Hi all,
I am working on the second part of ROS2 “real robot” wall-following project.
Following is the intended behavior:
Run wall_finder node (service server) that aligns the robot to the nearest wall, approaches to ~0.30 m, rotates so the wall is on the robot’s right.
Run wall_follower node (client) that first calls the find_wall service and only after the service completes starts publishing for right-wall following.
Following is my issue (with the FastBot in Small city lab):
If I launch wall_finder (server) first: it starts and shows “server ready”.
When I launch wall_follower (client): it receives LaserScan continuously and prints scan values for ranges, but the robot does not move at all.
Here’s the part that I don’t understand: as soon as I press Ctrl+C to stop the client process, the robot suddenly starts moving (it looks like the server-side wall-finding motion begins right after the client exits).
In simulation, everything runs correctly (client calls service, server moves robot, client then follows wall) but on the real robot the behavior is strange and I suspect my service-client/executor behavior (or timing/discovery) of causing some sort of a block.
What could cause the real robot to only move after the client is killed, even though the service server is “ready” and topics appear correct?
I am pasting the relevant structure from my wall_follower node (client). I’m using MultiThreadedExecutor(num_threads=4) and ReentrantCallbackGroup .
Blockquote
Setup in __init__
Create /cmd_vel publisher
Create service client /find_wall
Create a timer to try calling the service (non-blocking)
Create /scan subscriber
Create control timer (wall-following loop)
Service call is done through a timer
try_call_find_wall() runs every 0.5 sec:
If not aligned and not already waiting for a future → calls call_async()
Adds a done-callback to set self.aligned=True when the server finishes
Control callback is blocked until service finishes
control_callback() publishes zero velocities until self.aligned=True
Here is the exact relevant structure (same as my code):
Hello @ninadmehta34 ,
It seems the issue is with the fact that all your callbacks are in the same group. So even if you have a multithreaded executor if the callback group is the same and it is one assigned by default. No proper multi-threading occurs.
Yes, please share. we need to verify where the /cmd_vel publishing is happening. If your service callback is publishing /cmd_vel commands while waiting for the alignment to complete, it will block and never return a response to the client.
The correct pattern is:
The service callback should only return a response (True/False) immediately
The /cmd_vel publishing should happen in a separate timer callback, not inside the service callback
#! /usr/bin/python3
import math
import rclpy
import time # Added for the loop delay
from rclpy.node import Node
from geometry_msgs.msg import Twist
from sensor_msgs.msg import LaserScan
from custom_interfaces.srv import FindWall
from rclpy.executors import MultiThreadedExecutor
from rclpy.callback_groups import ReentrantCallbackGroup
class WallFinder(Node):
def __init__(self):
super().__init__("Wall_Finder_test")
self.get_logger().info("Initializing wall_finder")
#reentrant group
self.reentrant_group_1 = ReentrantCallbackGroup()
self.reentrant_group_2 = ReentrantCallbackGroup()
#declare and initialize cmd_vel publisher
self.cmd_vel_pub = self.create_publisher(Twist,"/cmd_vel",10)
self.get_logger().info("Initialized /cmd_vel Publisher")
self.scan_sub = self.create_subscription(LaserScan, "/scan", self.scan_callback, 10, callback_group=self.reentrant_group_1)
# self.control_timer = self.create_timer(0.1, self.control_callback)
self.srv = self.create_service(FindWall, 'find_wall', self.find_wall_callback, callback_group=self.reentrant_group_2)
self.scan_ready = False
self.latest_scan = None
#going by stages: face_wall, approach, rotate_right, done
self.stage = "face_wall"
self.distance_from_wall = 0.3 # meters
self.distance_from_wall_tolerance = 0.01
self.angle_tolerance_rad = math.radians(2.0) #stop when within +-5
self.turn_speed = 0.2
self.linear_vel = 0.05
self.twist_cmd = Twist()
self.get_logger().info("Find wall service ready...")
#define scan callback to get the ranges and get the minimum range out of all
def scan_callback(self, scan_msg):
# Always keep the latest scan
self.latest_scan = scan_msg
self.scan_ready = True
if not self.scan_ready:
self.scan_ready = True
self.get_logger().info("Scan initialized")
def find_wall_callback(self, request, response):
self.get_logger().info("Service Started: Finding wall...")
self.stage = "face_wall"
while self.stage != "done":
# Safety: wait for scan to initialize
if not self.scan_ready or self.latest_scan is None:
self.get_logger().info("Waiting for scan...")
time.sleep(0.1)
continue
scan = self.latest_scan
#below was a sanity check for what index the right side of robot is at.
# for idx in [0, 180, 270, 360, 540, 719]:
# ang = scan.angle_min + idx*scan.angle_increment
# self.get_logger().info(f"idx {idx} -> {math.degrees(ang):.1f} deg")
# 1. Find global minimum valid range and its index
best_range = float('inf')
best_index = None
for idex, rnge in enumerate(scan.ranges):
if math.isinf(rnge) or math.isnan(rnge):
continue
# Below is to skip readings that are likely the robot's own body (noise)
# Simulation doesn't have this, but real robot might.
if rnge < 0.15:
continue
if rnge < best_range:
best_range = rnge
best_index = idex
# Handle case where no wall is seen
if best_index is None:
self.get_logger().warn("No valid LaserScan ranges found beyond 0.15m.")
self.publish_cmd(0.0, 0.0)
time.sleep(0.1)
continue
best_angle = scan.angle_min + best_index * scan.angle_increment
best_angle = (best_angle + math.pi) % (2 * math.pi) - math.pi # normalize [-pi, pi]
#STAGE 1 Face the Wall
if self.stage == "face_wall":
# Check if the closest point is within our tolerance (e.g. 10 degrees)
if abs(best_angle) <= self.angle_tolerance_rad:
self.publish_cmd(0.0, 0.0)
self.get_logger().info(
f"STAGE 1 COMPLETE: Facing wall. Min={best_range:.2f}m angle={math.degrees(best_angle):.1f} deg"
)
self.stage = "approach"
else:
# If best_angle is positive, wall is to our left -> turn left (positive speed)
# If best_angle is negative, wall is to our right -> turn right (negative speed)
angular_speed = self.turn_speed if best_angle > 0 else -self.turn_speed
self.publish_cmd(0.0, angular_speed)
#Stage 2: Approach to 0.3 m
elif self.stage == "approach":
front_dist = scan.ranges[0]
if math.isinf(front_dist) or math.isnan(front_dist):
self.publish_cmd(0.0, 0.0)
self.get_logger().warn("Front distance invalid. Stopping.")
continue
if front_dist > (self.distance_from_wall + self.distance_from_wall_tolerance):
self.publish_cmd(self.linear_vel, 0.0)
else:
self.publish_cmd(0.0, 0.0)
self.get_logger().info(f"Reached ~0.3m from wall. Front={front_dist:.2f}m")
self.stage = "rotate_right_align"
# STAGE 3: rotate until wall is on right
elif self.stage == "rotate_right_align":
# Target for Right Side is -90 degrees (-1.57 radians)
target_angle = -1.57
angle_tolerance = math.radians(2.0)
# Calculate how far we are from the target angle
error = best_angle - target_angle
if abs(error) < angle_tolerance:
self.publish_cmd(0.0, 0.0)
self.stage = "done"
self.get_logger().info(f"Stage 3 Complete: Wall aligned at {math.degrees(best_angle):.1f} deg")
else:
self.publish_cmd(0.0, self.turn_speed)
time.sleep(0.05) # Prevent CPU hogging
# Final Response
response.wallfound = True
self.get_logger().info("Service Finished: Wall Found and Aligned.")
return response
def publish_cmd(self, linear_vel, angular_speed):
twist_cmd =Twist()
twist_cmd.linear.x = float(linear_vel)
twist_cmd.angular.z = float(angular_speed)
self.cmd_vel_pub.publish(twist_cmd)
def main(args=None):
rclpy.init(args=args)
find_wall_node=WallFinder()
#Use multithread executor
executor = MultiThreadedExecutor(num_threads=3)
executor.add_node(find_wall_node)
try:
executor.spin()
except KeyboardInterrupt:
pass
finally:
find_wall_node.cmd_vel_pub.publish(Twist()) #stop robot on shutdown
find_wall_node.destroy_node()
rclpy.shutdown()
if __name__ == "__main__":
main()
There is publishing of cmd_vel value in this… I think this require re-implementation. Otherwise, as it stands every case the involves the call to publish will cause the response line to be not reached until the callback is forced to exit.
You are right! I missed that the control callback keeps calling cmd_vel while my service server is also calling it to align it with the wall. I think what was happening was:
In the control_callback, “if not self.aligned” and “if not self.scan_info_done” were interfering with the server’s cmd_vel and eventually causing the robot to not move at all (this was the explanation I could come up with).
Remove cmd_vel from those two fixed the issue!
Thank you for the help @takavarasha. Its working fine now.