top of page

Week 5

Using MediaPipe to track and gesture recognition of the hand and using MQTT to control the robot

374983633_831163668603704_825140595785911113_n_edited.jpg

Overview

          Help team 3 do the hand-tracking and try to use MQTT to control the robot (Real-time remote controlling)  

Hand track (Python)

This is my hand-track Python code

------------------------------------------------------------------------------------

import cv2
import mediapipe as mp
import math

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()

# Define robot control functions (replace with actual robot control code)
def robot_change_function():
    print("Robot Change function to 'ARUCO'")

def robot_stand():
    print("Robot standup")

def robot_sit():
    print("Robot standdown")

def robot_turn_left():
    print("Robot turning left")

def robot_turn_right():
    print("Robot turning right")

def robot_follow():
    print("Robot start follow")

def robot_stop():
    print("Robot stop")

# Initialize the webcam
cap = cv2.VideoCapture(0)

while True:
    # Read a frame from the webcam
    ret, frame = cap.read()

    if not ret:
        break
    
    # Flip the frame horizontally
    frame = cv2.flip(frame, 1)  # 1 means horizontal flip

    # Get the dimensions of the frame
    height, width, _ = frame.shape

    # Draw horizontal (X) and vertical (Y) axes
    cv2.line(frame, (0, height // 2), (width, height // 2), (0, 0, 255), 1)
    cv2.line(frame, (width // 2, 0), (width // 2, height), (0, 0, 255), 1)

    # Add text labels for the axes
    cv2.putText(frame, "+X", (width - 20, height // 2 + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
    cv2.putText(frame, "-X", (5, height // 2 + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
    cv2.putText(frame, "+Y", (width // 2 - 20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
    cv2.putText(frame, "-Y", (width // 2 - 20, height - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)

    # Convert the frame to RGB for processing by MediaPipe
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Process the frame and detect hands
    results = hands.process(frame_rgb)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Draw hand landmarks and label each point with its index
            for idx, landmark in enumerate(hand_landmarks.landmark):
                x, y = int(landmark.x * width), int(landmark.y * height)

                # Draw circles at the landmark positions
                cv2.circle(frame, (x, y), 5, (0, 255, 0), -1)

                # Add labels indicating landmark index
                cv2.putText(frame, str(idx), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)


            # Check for fingers extended
            fingertips = [4, 8, 12, 16, 20]  # Landmark indices for fingertips
            fingers_extended = all(
                math.dist(
                    (hand_landmarks.landmark[fingertip1].x * width, hand_landmarks.landmark[fingertip1].y * height),
                    (hand_landmarks.landmark[fingertip2].x * width, hand_landmarks.landmark[fingertip2].y * height)
                ) > 30  # Adjust this threshold as needed
                for fingertip1, fingertip2 in [(0, 4), (4, 8), (8, 12), (12, 16), (16, 20)]
            )
            # Calculate the x-coordinate of the thumb tip and the index finger tip
            x1 = hand_landmarks.landmark[1].x * width
            x2 = hand_landmarks.landmark[2].x * width
            thumb_x4 = hand_landmarks.landmark[4].x * width
            index_x8 = hand_landmarks.landmark[8].x * width

            
            # Display a message based on fingers extended
            # if fingers_extended:
            #     cv2.putText(frame, "Fingers Extended", (10, height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
            

            # Check if landmark 12 (top of hand) is at its highest position on the Y-axis
            # and landmark 4 (rightmost) is at its highest position on the X-axis
            #StandbyRight
            if (
                hand_landmarks.landmark[12].y == min(hand_landmarks.landmark[landmark_idx].y for landmark_idx in range(21))
                and hand_landmarks.landmark[4].x == max(hand_landmarks.landmark[landmark_idx].x for landmark_idx in range(21))
            ):
                cv2.putText(frame, "Aruco", (10, height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                robot_change_function()
            #StandbyLeft
            elif (
                hand_landmarks.landmark[12].y == min(hand_landmarks.landmark[landmark_idx].y for landmark_idx in range(21))
                and hand_landmarks.landmark[4].x == min(hand_landmarks.landmark[landmark_idx].x for landmark_idx in range(21))              
            ):
                cv2.putText(frame, "Aruco", (10, height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                robot_change_function()
            #UpRight
            elif (
                hand_landmarks.landmark[8].y == min(hand_landmarks.landmark[landmark_idx].y for landmark_idx in range(21))
                and hand_landmarks.landmark[4].x == max(hand_landmarks.landmark[landmark_idx].x for landmark_idx in range(21))
            ):
                cv2.putText(frame, "Stand", (10, height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                robot_stand()
            #UpLeft
            elif (
                hand_landmarks.landmark[8].y == min(hand_landmarks.landmark[landmark_idx].y for landmark_idx in range(21))
                and hand_landmarks.landmark[4].x == min(hand_landmarks.landmark[landmark_idx].x for landmark_idx in range(21))
            ):
                cv2.putText(frame, "Stand", (10, height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                robot_stand()
            #DownRight
            elif (
                hand_landmarks.landmark[8].y == max(hand_landmarks.landmark[landmark_idx].y for landmark_idx in range(21))
                and hand_landmarks.landmark[4].x == max(hand_landmarks.landmark[landmark_idx].x for landmark_idx in range(21))
            ):
                cv2.putText(frame, "Sit", (10, height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                robot_sit()
            #DownLeft
            elif (
                hand_landmarks.landmark[8].y == max(hand_landmarks.landmark[landmark_idx].y for landmark_idx in range(21))
                and hand_landmarks.landmark[4].x == min(hand_landmarks.landmark[landmark_idx].x for landmark_idx in range(21))
            ):
                cv2.putText(frame, "Sit", (10, height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                robot_sit()
            #Turn(R)
            elif (
                hand_landmarks.landmark[4].y == min(hand_landmarks.landmark[landmark_idx].y for landmark_idx in range(21))
                and hand_landmarks.landmark[8].x == max(hand_landmarks.landmark[landmark_idx].x for landmark_idx in range(21))
            ):
                cv2.putText(frame, "Right", (10, height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                robot_turn_right()
            #Turn(L)
            elif (
                hand_landmarks.landmark[4].y == min(hand_landmarks.landmark[landmark_idx].y for landmark_idx in range(21))
                and hand_landmarks.landmark[8].x == min(hand_landmarks.landmark[landmark_idx].x for landmark_idx in range(21))
            ):
                cv2.putText(frame, "Left", (10, height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                robot_turn_left()
            #FollowRight
            elif (
                hand_landmarks.landmark[12].y == max(hand_landmarks.landmark[landmark_idx].y for landmark_idx in range(21))
                and hand_landmarks.landmark[4].x == max(hand_landmarks.landmark[landmark_idx].x for landmark_idx in range(21))
            ):
                cv2.putText(frame, "Follow", (10, height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                robot_follow()
            #FollowLeft
            elif (
                hand_landmarks.landmark[12].y == max(hand_landmarks.landmark[landmark_idx].y for landmark_idx in range(21))
                and hand_landmarks.landmark[4].x == min(hand_landmarks.landmark[landmark_idx].x for landmark_idx in range(21))
            ):
                cv2.putText(frame, "Follow", (10, height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                robot_follow()
            elif (
                hand_landmarks.landmark[10].y == min(hand_landmarks.landmark[landmark_idx].y for landmark_idx in range(21))
                or hand_landmarks.landmark[9].y == min(hand_landmarks.landmark[landmark_idx].y for landmark_idx in range(21))
            ):
                cv2.putText(frame, "Stop", (10, height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
                robot_stop()
            
    # Display the frame with the labeled X and Y axes, hand landmarks, and landmark numbers
    cv2.imshow("Hand Detection", frame)

    # Exit the loop when 'e' is pressed
    if cv2.waitKey(1) & 0xFF == ord('e'):
        break

# Release the webcam and close OpenCV windows
cap.release()
cv2.destroyAllWindows()

-----------------------------------------------------------------------------------

Result of the code

1.) Change Mode to Aruco

 

 

 

 

 

 

 

2.) Sit-Down

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

3.) Stand-UP

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

4.) Turn Left

 

 

 

 

 

 

 

 

 

 

5.) Turn Right

 

 

 

 

 

 

 

 

 

6.) Stop

 

 

MQTT

Step 1: Install Mqtt by following these commands

  • $ sudo apt-get install mosquitto-clients

  • $ mosquitto_sub -v -t "#"

Step 2: Connect to the Robot's Wi-Fi hotspot (Need to do this step every time that you want to connect with the robot)

Step 3: Now you can use these example commands on the terminal to control the robot remotely through Mqtt

  • $ mosquitto_pub -h 192.168.12.1 -d -t "controller/action" -m "standUp"

  • $ mosquitto_pub -h 192.168.12.1 -d -t "controller/action" -m "standDown"

  • $ mosquitto_pub -h 192.168.12.1 -d -t "controller/action" -m "run"

  • $ mosquitto_pub -h 192.168.12.1 -d -t "controller/action" -m "walk"

  • $ mosquitto_pub -h 192.168.12.1 -d -t "controller/action" -m "climb"

  • $ mosquitto_pub -h 192.168.12.1 -d -t "controller/action" -m "dance1"

  • $ mosquitto_pub -h 192.168.12.1 -d -t "controller/action" -m "dance2"

  • $ mosquitto_pub -h 192.168.12.1 -d -t "controller/action" -m "jumpYaw"

  • $ mosquitto_pub -h 192.168.12.1 -d -t "controller/action" -m "straightHand1"

  •  $mosquitto_pub -h 192.168.12.1 -d -t "controller/action" -m "backflip"

  • $ mosquitto_pub -h 192.168.12.1 -d -t "controller/action" -m "dance3

  • $ mosquitto_pub -h 192.168.12.1 -d -t "controller/action" -m "dance4

Step 4: After testing some example commands now you can try to run my Python code to control the robot remotely with your keyboard (w a s d) through Mqtt

------------------------------------------------------------------------------------

#!/usr/bin/python

import sys
import time
import math

sys.path.append('../lib/python/arm64')
import robot_interface as sdk

""" 
mode
0: idle default stand
1: force stand
2: target velocity walking (controlled by velocity + yawSpeed)
3: target position walking (controlled by position + ypr[0])
4: path mode walking (reserve for future release)
5: position stand down.
6: position stand up
7: damping mode
8: recovery stand
9: backflip
10: jumpYaw
11: straightHand
12: dance1
13: dance2
"""
"""
gaitType
0: idle 
1: trot 
2: trot running 
3: climb stair 
4: trot obstacle
"""
"""
speedLevel
0. default low speed. 1. medium speed 2. high speed. during walking, only respond MODE 3
"""
"""
velocity
(forwardSpeed, sideSpeed in body frame)
"""
"""
yawSpeed
(unit: rad/s), rotateSpeed in body frame
"""
"""
footRaiseHeight
(unit: m, default: 0.08m), foot up height while walking, delta value
"""
"""
euler
(unit: rad), (roll, pitch, yaw) in stand mode
"""
"""
bodyHeight
(unit: m, default: 0.28m), delta value
"""

def command(cmd, udp, state, mode = 0, gaitType = 0, speedLevel = 0, footRaiseHeight = 0, bodyHeight = 0, euler = [0, 0, 0], velocity = [0, 0], yawSpeed = 0):
    time.sleep(0.02)
    udp.Recv()
    udp.GetRecv(state)
    cmd.mode = mode      
    cmd.gaitType = gaitType 
    cmd.speedLevel = speedLevel   
    cmd.footRaiseHeight = footRaiseHeight
    cmd.bodyHeight = bodyHeight
    cmd.euler = euler
    cmd.velocity = velocity
    cmd.yawSpeed = yawSpeed 
    cmd.reserve = 0

 

if __name__ == '__main__':

    HIGHLEVEL = 0xee
    LOWLEVEL  = 0xff

    udp = sdk.UDP(HIGHLEVEL, 8080, "192.168.123.161", 8082)

    cmd = sdk.HighCmd()
    state = sdk.HighState()
    udp.InitCmdData(cmd)

    motiontime = 0
    # while True:
        
    #     time.sleep(0.002)
    #     motiontime = motiontime + 1 

    #     udp.Recv()
    #     udp.GetRecv(state)
        
    #     if(motiontime > 0 and motiontime < 10):
    #         command(cmd, udp, state, mode = 2, velocity = [0.4, 0] )
            
    #     elif motiontime > 10:
    #         time.sleep(0.002)
    #         break

    #     udp.SetSend(cmd)
    #     udp.Send()
    # print("END")

    while True:
        key = input("กดปุ่ม (w/a/s/d) หรือ q เพื่อออก: ")
        motiontime = motiontime + 1
        if key == 'w':
            command(cmd, udp, state, mode = 2, velocity = [0.4, 0] )
        elif key == 's':
            command(cmd, udp, state, mode = 2, velocity = [-0.4, 0] )
        elif key == 'a':
            command(cmd, udp, state, mode = 2, yawSpeed = 0.4, velocity = [0.2, 0])

        elif key == 'd':
            command(cmd, udp, state, mode = 2, yawSpeed = -0.4, velocity = [0.2, 0])

        elif key == 'e':
            command(cmd, udp, state, mode = 5)

        elif key == 'r':
            command(cmd, udp, state, mode = 1)

        elif key == 'q':
            command(cmd, udp, state)
            break
        udp.SetSend(cmd)
        udp.Send()

 

------------------------------------------------------------------------------------

bottom of page