more stable implementation of speech stuff

2019-02-02 13:38:35 +01:00
parent 3217b7f841
commit 655c5418fd
7 changed files with 77 additions and 279 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -47,6 +47,4 @@ catkin_install_python(PROGRAMS
 )

 add_executable(aruco_detector src/aruco_detector.cpp)
-add_executable(speech src/speech.cpp)
-target_link_libraries(aruco_detector ${catkin_LIBRARIES} ${aruco_LIB})
-target_link_libraries(speech ${catkin_LIBRARIES} ${aruco_LIB})
+target_link_libraries(aruco_detector ${catkin_LIBRARIES})
--- a/launch/fulltest.launch
+++ b/launch/fulltest.launch
@@ -8,13 +8,12 @@
        output="screen"/>
    <node name="speech_server" pkg="teleoperation" type="speech_server.py"
        output="screen"/>
-    <node name="speech_client" pkg="teleoperation" type="speech_client.py"
-        output="screen"/>
    <node name="controller" pkg="teleoperation" type="controller.py"
        output="screen"/>
+    <node name="speech_client" pkg="teleoperation" type="speech_client.py"
+        output="screen"/>
    <node name="imitator" pkg="teleoperation" type="imitator.py"
        output="screen"/>
    <node name="walker" pkg="teleoperation" type="walker.py"/>
    <node name="fall_detector" pkg="teleoperation" type="fall_detector.py"/>
-
 </launch>
--- a/script/controller.py
+++ b/script/controller.py
@@ -51,7 +51,7 @@ def handle_request(r):

    elif module == 'speech':
        if message == 'recognize':
-            if STATE in ('idle', 'imitate'):
+            if STATE in ('idle', 'imitate', 'dead'):
                permission = True
        elif message == 'imitate':
            if STATE == 'idle':
@@ -61,6 +61,13 @@ def handle_request(r):
            if STATE == 'imitate':
                STATE = 'idle'
                permission = True
+        elif message == 'kill':
+            STATE = 'dead'
+            permission = True
+        elif message == 'revive':
+            if STATE == 'dead':
+                STATE = 'idle'
+                permission = True

    rospy.logdebug(
        'GOT REQUEST FROM %s TO %s.\nPERMISSION: %s.\nSTATE IS NOW: %s.' % (
--- a/script/speech_client.py
+++ b/script/speech_client.py
@@ -13,9 +13,15 @@ from controller import inform_controller_factory
 in_progress = False
 state = 'idle'

+IMITATE = 'repeat'
+KILL = 'kill'
+REVIVE = 'go'
+STOP = 'stop'
+
 voc_state = {
-    'idle': 'start',
-    'imitate': 'stop'
+    'idle': [IMITATE, KILL],
+    'imitate': [STOP, KILL],
+    'killed': [REVIVE]
 }

 _inform_controller = inform_controller_factory('speech')
@@ -23,10 +29,17 @@ _inform_controller = inform_controller_factory('speech')

 def done_cb(_, result):
    global in_progress, state
-    rospy.loginfo(result)
-    if result.word == 'start' and _inform_controller('imitate'):
+    rospy.loginfo('SPEECH CLIENT: {}'.format(result))
+    if result is None:
+        in_progress = False
+        return
+    if result.word == IMITATE and _inform_controller('imitate'):
        state = 'imitate'
-    elif result.word == 'stop' and _inform_controller('stop'):
+    elif result.word == STOP and _inform_controller('stop'):
+        state = 'idle'
+    elif result.word == KILL and _inform_controller('kill'):
+        state = 'killed'
+    elif result.word == REVIVE and _inform_controller('revive'):
        state = 'idle'
    in_progress = False

@@ -46,11 +59,10 @@ if __name__ == '__main__':
                client.cancel_goal()
                in_progress = False
            state = 'idle'
-            continue

+        else:
            if not in_progress:
                in_progress = True
-            client.send_goal(RequestSpeechGoal([voc_state[state]]),
+                client.send_goal(RequestSpeechGoal(voc_state[state]),
                                 done_cb)
-        rospy.Rate(2).sleep()
-
+        rospy.Rate(4).sleep()
--- a/script/speech_server.py
+++ b/script/speech_server.py
@@ -10,7 +10,6 @@ from teleoperation.msg import RequestSpeechAction, RequestSpeechResult

 speech_broker = None
 almem = None
-r = False


 def request_speech(goal):
@@ -18,7 +17,9 @@ def request_speech(goal):
        sas.set_succeeded(RequestSpeechResult(word=''))
        return

-    while not sas.is_preempt_requested() and not speech_detector.have_word():
+    while (not sas.is_preempt_requested() and
+           not speech_detector.have_word() and
+           not rospy.is_shutdown()):
        rospy.Rate(10).sleep()

    if speech_detector.have_word():
@@ -40,16 +41,24 @@ class SpeechDetectorModule(ALModule):
        self.asr = ALProxy('ALSpeechRecognition')
        self.tts = ALProxy('ALTextToSpeech')
        self.asr.setLanguage('English')
-        self.running = False
+        almem.subscribeToEvent("WordRecognized",
+                               "speech_detector",
+                               "on_word_recognized")
+        self.asr.pause(True)
+        self._busy = False

-    def start_speech(self, voc):
-        if self.running:
+    def get_status(self):
+        print(almem.getData('ALSpeechRecognition/Status'))
+
+    def start_speech(self, voc, resume=False):
+        if self._busy != resume:
            return False
+        if not resume:
            self.voc = voc
            self.asr.setVocabulary(voc, False)
            self.asr.subscribe(self.subid)
        self.asr.pause(False)
-        self.running = True
+        self._busy = True
        return True

    def have_word(self):
@@ -60,23 +69,26 @@ class SpeechDetectorModule(ALModule):
        self.recognized = None
        return result

-    def stop_speech(self):
-        if not self.running:
+    def stop_speech(self, pause=False):
+        if not self._busy:
            return
-        self.asr.unsubscribe(self.subid)
        self.asr.pause(True)
-        self.running = False
+        if not pause:
+            self.asr.unsubscribe(self.subid)
+            self._busy = False

    def on_word_recognized(self, *_args):
        word, conf = almem.getData('WordRecognized')
+        print(word, conf)
        if conf > 0.4:
-            self.stop_speech()
+            self.stop_speech(pause=True)
            self.tts.say(word)
            self.recognized = word
-        else:
            self.stop_speech()
+        else:
+            self.stop_speech(pause=True)
            self.tts.say('I didn\'t understand. Please repeat')
-            self.start_speech(self.voc)
+            self.start_speech(self.voc, resume=True)



@@ -84,21 +96,16 @@ if __name__ == '__main__':
    rospy.init_node('speech_server')
    speech_broker = ALBroker('speech_broker', '0.0.0.0', 0,
                             os.environ['NAO_IP'], 9559)
-    speech_detector = SpeechDetectorModule('speech_detector')
    almem = ALProxy('ALMemory')
-    almem.subscribeToEvent("WordRecognized",
-                           "speech_detector",
-                           "on_word_recognized")
-    speech_detector.asr.pause(True)
+    speech_detector = SpeechDetectorModule('speech_detector')
    sas = actionlib.SimpleActionServer('speech_server', RequestSpeechAction,
                                       execute_cb=request_speech,
                                       auto_start=False)
    sas.start()

    while not rospy.is_shutdown():
-        rospy.Rate(4).sleep()
-
-    if speech_detector.running:
-        speech_detector.stop_speech()
+        rospy.Rate(1).sleep()
+    while sas.is_active():
+        pass

    speech_broker.shutdown()
--- a/script/walker.py
+++ b/script/walker.py
@@ -21,8 +21,8 @@ VMIN = 0.3
 VMAX = 1.0


-def thirdway(a, b):
-    return a + (b - a) / 3
+def n_way(a, b, n=3):
+    return a + (b - a) / n


 def global_init():
@@ -32,12 +32,12 @@ def global_init():
        x = json.load(f)

    cx, cy, cz = x['cr']
-    FW = thirdway(cx, x['fw']), x['fw']
-    BK = thirdway(cx, x['bk']), x['bk']
-    LT = thirdway(cy, x['lt']), x['lt']
-    RT = thirdway(cy, x['rt']), x['rt']
-    LR = thirdway(cz, x['lr']), x['lr']
-    RR = thirdway(cz, x['rr']), x['rr']
+    FW = n_way(cx, x['fw']), x['fw']
+    BK = n_way(cx, x['bk']), x['bk']
+    LT = n_way(cy, x['lt']), x['lt']
+    RT = n_way(cy, x['rt']), x['rt']
+    LR = n_way(cz, x['lr'], 2), x['lr']
+    RR = n_way(cz, x['rr'], 2), x['rr']


 _inform_controller = inform_controller_factory('walker')
@@ -96,7 +96,8 @@ if __name__ == '__main__':
        if not any(movement):
            rospy.logdebug('WALKER: STOP')
            _inform_controller('stop')
-            mp.move(0, 0, 0)
+            # mp.move(0, 0, 0)
+            mp.stopMove()
            continue

        permission = _inform_controller('move')
--- a/src/speech.cpp
+++ b/src/speech.cpp
@@ -1,226 +0,0 @@
-#include <iostream>
-#include <fstream>
-#include <stdlib.h>
-#include <ros/ros.h>
-#include <string.h>
-
-#include <std_srvs/Empty.h>
-#include <naoqi_bridge_msgs/SpeechWithFeedbackActionGoal.h>
-#include <actionlib_msgs/GoalStatusArray.h>
-#include <naoqi_bridge_msgs/SetSpeechVocabularyActionGoal.h>
-#include <naoqi_bridge_msgs/WordRecognized.h>
-#include <std_msgs/Bool.h>
-#include <std_msgs/String.h>
-
-#include <teleoperation/InformController.h>
-#include <teleoperation/utils.hpp>
-
-using namespace std;
-
-class Nao_control {
-
-protected:
-
-    // ROS node handler
-    ros::NodeHandle nh_;
-
-    // Publisher for nao speech
-    ros::Publisher speech_pub;
-
-    // Publisher for nao vocabulary parameters
-    ros::Publisher voc_params_pub;
-
-    // Client for starting speech recognition
-    ros::ServiceClient recog_start_srv;
-
-    // Client for stopping speech recognition
-    ros::ServiceClient recog_stop_srv;
-
-    // Subscriber to speech recognition
-    ros::Subscriber recog_sub;
-
-    // Publisher for recognized commands
-    ros::ServiceClient ic;
-
-    bool imitating;
-
-    int speech_id_ctr;
-
-public:
-
-    Nao_control() : imitating(false), speech_id_ctr(1) {
-
-        ROS_INFO("Constructor");
-
-        speech_pub = nh_.advertise<naoqi_bridge_msgs::SpeechWithFeedbackActionGoal>(
-                "/speech_action/goal", 1);
-
-        voc_params_pub= nh_.advertise<naoqi_bridge_msgs::SetSpeechVocabularyActionGoal>(
-                "/speech_vocabulary_action/goal", 1);
-
-        recog_start_srv=nh_.serviceClient<std_srvs::Empty>("/start_recognition");
-
-        recog_stop_srv=nh_.serviceClient<std_srvs::Empty>("/stop_recognition");
-
-        recog_sub=nh_.subscribe("/word_recognized", 1,
-                &Nao_control::speechRecognitionCallback, this);
-
-        ic = nh_.serviceClient<teleoperation::InformController>(
-                "/inform_controller", false);
-    }
-
-    ~Nao_control() {
-
-        ROS_INFO("SPEECH: DESTRUCT");
-
-        std_srvs::Empty srv;
-
-        if (recog_stop_srv.call(srv)) {
-            ROS_INFO("SUCCESSFULLY STOPPED RECOGNITION");
-        }
-        else {
-            ROS_ERROR("COULDN'T STOP RECOGNITION");
-        }
-    }
-
-    void speechRecognitionCallback(
-            const naoqi_bridge_msgs::WordRecognized::ConstPtr& msg) {
-
-        ROS_INFO("A WORD WAS RECOGNIZED");
-        std_srvs::Empty srv;
-
-        ROS_INFO("CONFIDENCE: %lf", msg->confidence_values[0]);
-
-        for (int i = 0; i < msg->words.size(); i++) {
-            std::cout << msg->words[i] << std::endl;
-        }
-
-        //set pause duration
-        double f_pause = 2;
-
-        if (recog_stop_srv.call(srv) && ((msg->words.size())> 0)) {
-
-            ROS_INFO("SUCCESSFULLY STOPPED RECOGNITION");
-
-            // Use confidence level to decide wether the recognized word
-            // should be published
-
-            if (msg->confidence_values[0] > 0.35) {
-
-                ROS_INFO("SPEECH STARTING");
-                std::string say = "Ok I understood " + msg->words[0];
-
-                naoqi_bridge_msgs::SpeechWithFeedbackActionGoal s_msg;
-                s_msg.goal_id.id = stuff_to_str(this->speech_id_ctr);
-                this->speech_id_ctr += 1;
-                s_msg.goal.say = say;
-                this->speech_pub.publish(s_msg);
-                teleoperation::InformController ic_msg;
-                ic_msg.request.module = "speech_recognition";
-                if (msg->words[0] == "imitate") {
-                    ic_msg.request.message = "imitate";
-                    ROS_INFO("SPEECH: REQUESTING IMITATION");
-                    if (this->ic.call(ic_msg) && ic_msg.response.permission) {
-                        this->imitating = true;
-                    }
-                    else {
-                        ROS_ERROR("SPEECH: CONTROLLER UNREACHABLE");
-                    }
-                }
-                else if (msg->words[0] == "stop") {
-                    ROS_INFO("SPEECH: REQUESTING STOP IMITATION");
-                    ic_msg.request.message = "stop";
-                    if (this->ic.call(ic_msg) && ic_msg.response.permission) {
-                        this->imitating = false;
-                    }
-                    else {
-                        ROS_ERROR("SPEECH: CONTROLLER UNREACHABLE");
-                    }
-                }
-
-            }
-            else if (msg->confidence_values[0] > 0.05) {
-
-                ROS_INFO("SPEECH STARTING");
-                std::string say = "I did not understand. Could you repeat that please";
-
-                naoqi_bridge_msgs::SpeechWithFeedbackActionGoal s_msg;
-                s_msg.goal_id.id = stuff_to_str(this->speech_id_ctr);
-                this->speech_id_ctr += 1;
-                s_msg.goal.say = say;
-                this->speech_pub.publish(s_msg);
-
-                // increase pause duration
-                f_pause = 0.4;
-
-            }
-
-        }
-        else {
-            ROS_ERROR("COULDN'T STOP RECOGNITION");
-        }
-        // pause until NAO stops talking
-        ros::Rate loop_rate(f_pause);
-        loop_rate.sleep();
-
-        // re-start recogntion
-        commandRecognition();
-    }
-
-    void initializeVocabulary()
-    {
-
-        std::vector<std::string> vocabulary;
-        if (!this->imitating) vocabulary.push_back("imitate");
-        else vocabulary.push_back("stop");
-        vocabulary.push_back("open");
-        vocabulary.push_back("close");
-        // vocabulary.push_back("kill");
-
-        naoqi_bridge_msgs::SetSpeechVocabularyActionGoal msg;
-        msg.goal.words = vocabulary;
-        msg.goal_id.id = stuff_to_str(speech_id_ctr);
-        std::cout << msg.goal << std::endl;
-        speech_id_ctr += 1;
-        voc_params_pub.publish(msg);
-        ROS_INFO("VOCABULARY INITIALIZED");
-    }
-
-    void commandRecognition()
-    {
-        //recognition has to be started and ended once a valid command was found
-        while (true) {
-            ros::Rate loop_rate(4);
-            loop_rate.sleep();
-            teleoperation::InformController ic_msg;
-            ic_msg.request.module = "speech_recognition";
-            ic_msg.request.message = "recognize";
-            if (this->ic.call(ic_msg) && ic_msg.response.permission) {
-                break;
-            }
-        }
-        this->initializeVocabulary();
-        ros::Rate loop_rate(1);
-        loop_rate.sleep();
-        std_srvs::Empty srv;
-
-        if (recog_start_srv.call(srv)) {
-            ROS_INFO("SUCCESSFULLY STARTED RECOGNITION");
-        }
-        else {
-            ROS_ERROR("COULDN'T START RECOGNITION");
-         }
-    }
-};
-
-int main(int argc, char** argv) {
-    ros::init(argc, argv, "speech");
-
-    Nao_control TermiNAOtor;
-    ros::Rate loop_rate(1);
-    loop_rate.sleep();
-    TermiNAOtor.commandRecognition();
-    ROS_INFO("SPIN");
-    ros::spin();
-    return 0;
-}