diff --git a/.gitignore b/.gitignore index 3adaf1f..454d569 100644 --- a/.gitignore +++ b/.gitignore @@ -16,13 +16,16 @@ CMakeLists.txt.user # Ignore PDFs on master literature/ -# Pictures stuff +# Pictures *.png -# But not the whole scheme -!teleoperation_overview.png +# But not the necessary ones +!docs/figures/**/* # Presentation stuff *.pptx # fuck apple .DS_Store + +# Latex Stuff +build/ diff --git a/docs/figures/aruco.png b/docs/figures/aruco.png new file mode 100644 index 0000000..9c45960 Binary files /dev/null and b/docs/figures/aruco.png differ diff --git a/docs/figures/interface_nao.png b/docs/figures/interface_nao.png new file mode 100644 index 0000000..70189b9 Binary files /dev/null and b/docs/figures/interface_nao.png differ diff --git a/docs/figures/joint_jacobian.png b/docs/figures/joint_jacobian.png new file mode 100644 index 0000000..693e025 Binary files /dev/null and b/docs/figures/joint_jacobian.png differ diff --git a/docs/joint_jacobian.xml b/docs/figures/joint_jacobian.xml similarity index 100% rename from docs/joint_jacobian.xml rename to docs/figures/joint_jacobian.xml diff --git a/docs/posture_retargeting.xml b/docs/figures/posture_retargeting.xml similarity index 100% rename from docs/posture_retargeting.xml rename to docs/figures/posture_retargeting.xml diff --git a/docs/figures/rviz_human.png b/docs/figures/rviz_human.png new file mode 100644 index 0000000..fa333fa Binary files /dev/null and b/docs/figures/rviz_human.png differ diff --git a/docs/teleoperation_overview.png b/docs/figures/teleoperation_overview.png similarity index 100% rename from docs/teleoperation_overview.png rename to docs/figures/teleoperation_overview.png diff --git a/docs/teleoperation_overview.xml b/docs/figures/teleoperation_overview.xml similarity index 100% rename from docs/teleoperation_overview.xml rename to docs/figures/teleoperation_overview.xml diff --git a/docs/figures/usr_pt.png b/docs/figures/usr_pt.png new file mode 100644 index 0000000..57d1ff2 Binary files /dev/null and b/docs/figures/usr_pt.png differ diff --git a/docs/Joystick_TopDown.xml b/docs/figures/usr_pt.xml similarity index 100% rename from docs/Joystick_TopDown.xml rename to docs/figures/usr_pt.xml diff --git a/docs/report.latex b/docs/report.latex new file mode 100644 index 0000000..7a8ade9 --- /dev/null +++ b/docs/report.latex @@ -0,0 +1,331 @@ +\documentclass[conference]{IEEEtran} + +% \IEEEoverridecommandlockouts +% The preceding line is only needed to identify funding in the first footnote. +% If that is unneeded, please comment it out. + +\usepackage{cite} +\usepackage{amsmath,amssymb,amsfonts} +% \usepackage{algorithmic} +\usepackage{graphicx} +\usepackage{textcomp} +\usepackage{xcolor} +\usepackage{subcaption} +\usepackage{todonotes} + +\def\BibTeX{{\rm B\kern-.05em{\sc i\kern-.025em b}\kern-.08em + T\kern-.1667em\lower.7ex\hbox{E}\kern-.125emX}} + +\begin{document} + +\title{Humanoid Robotic Systems - ``Teleoperating NAO''} + +\author{Pavel Lutskov, Luming Li, Lukas Otter and Atef Kort} + +\maketitle + +\section{Project Description} + +In this semester the task of our group was to program a routine for +teleoperation of a NAO robot. Using the ArUco markers, placed on the operator's +chest and hands, the position and the posture of the operator should have been +determined by detecting the markers' locations with a webcam, and then the +appropriate commands should have been sent to the robot to imitate the motions +of the operator. The overview of the +process can be seen in \ref{fig:overview}. The main takeaway from +fulfilling this objective was practicing the skills that we acquired during the +Humanoid Robotic Systems course and to get familiar with the NAO robot as a +research and development platform. + +\begin{figure}[h] + \centering + \includegraphics[width=\linewidth]{figures/teleoperation_overview.png} + \caption{Overview of the defined states and their transistions.} + \label{fig:overview} +\end{figure} + +In closer detail, once the markers are detected, their coordinates relative to +the webcam are extracted. The position and the orientation of the user's +chest marker is used to control the movement of the NAO around the environment. +We call this approach a ``Human Joystick'' and we describe it in more detail in +\ref{ssec:navigation}. + +The relative locations of the chest and hand markers can be used to determine +the coordinates of the user's end effectors (i.e.\ hands) in the user's chest +frame. In order for the NAO to imitate the arm motions, these coordinates need +to be appropriately remapped into the NAO torso frame. With the knowledge of the +desired coordinates of the hands, the commands for the NAO joints can be +calculated by using the Cartesian control approach. We present a thorough +discussion of the issues we had to solve and the methods we used for arm motion +imitation in \ref{ssec:imitation}. + +Furthermore, in order to enable the most intuitive teleoperation, a user +interface was needed to be developed. In our system, we present the operator +with a current estimation of the operator's pose, a sensor feedback based robot +pose, as well as with the camera feed from both NAO's cameras and with the +webcam view of the operator. In order for the user to be able to give explicit +commands to the robot, such as a request to open or close the hands or to +temporarily suspend the operation, we implemented a simple voice command system. +Finally, to be able to accommodate different users and to perform control in +different conditions, a small calibration routine was developed, which would +quickly take a user through the process of setting up the teleoperation. +We elaborate on the tools and approaches that we used for implementation of the +user-facing features in \ref{ssec:interface}. + +An example task, that can be done using our teleoperation package might be the +following. The operator can safely and precisely navigate the robot through an +uncharted environment with a high number of obstacles to some lightweight +object, such as an empty bottle, then make the robot pick up that object and +bring the object back to the operator. Thanks to the high precision of the arm +motions and the constant operator input, the robot is able to pick up an object +of different shapes and sizes, applying different strategies when needed. We +demonstrate the functioning of our system in the supporting video. + +We used ROS as a framework for our implementation. ROS is a well-established +software for developing robot targeted applications with rich support +infrastructure and modular approach to logic organization For interacting +with the robot we mainly relied on the NAOqi Python API. The advantage of using +Python compared to C++ is a much higher speed of development and a more concise +and readable resulting code. + +\section{System Overview} + +\subsection{Vision} + +- Camera calibration +- Aruco marker extraction +- TF world coordinate publishing + +\begin{figure} + \centerline{\includegraphics[width=0.8\linewidth]{figures/aruco.png}} + \caption{ArUco marker detection on the operator.} + \label{fig:aruco_detection} +\end{figure} + +\subsection{Interface}\label{ssec:interface} + +\paragraph{Speech State Machine} + +Based on NAOqi API and NAO built-in voice recognition + +\begin{table} +\caption{Commands of the speech recognition module} +\begin{center} +\begin{tabular}{|c|c|c|} +\hline +\textbf{Command}&\textbf{Action}&\textbf{Available in state} \\ +\hline +``Go'' & Wake Up & Sleep \\ +\hline +``Kill'' & Go to sleep & Idle, Imitation \\ +\hline +``Arms'' & Start imitation & Idle \\ +\hline +``Stop'' & Stop imitation & Imitation \\ +\hline +``Open'' & Open hands & Idle, Imitation \\ +\hline +``Close'' & Close hands & Idle, Imitation \\ +\hline +\end{tabular} +\label{tab_speech_states} +\end{center} +\end{table} + +\paragraph{Teleoperation Interface} + +In order to make it possible to operate +the NAO without visual contact, a teleoperation interface was developed. This +interface allows the operator to receive visual feedback on the NAO as well as +additional information regarding his own position. + +The NAO-part contains video streams of the top and bottom cameras on the robots +head. These were created by subscribing to their respective topics (FIND NAME) +using the \textit{rqt\_gui} package. Moreover, it also consists of a rviz +window which gives a visual representation of the NAO. For this, the robot's +joint positions are displayed by subscribing to the topic tf where the +coordinates and the different coordinate frames are published. We further used +the \textit{NAO-meshes} package to create the 3D model of the NAO. + +\begin{figure} + \centering + %\hfill + \begin{subfigure}[b]{0.4\linewidth} + \includegraphics[width=\linewidth]{figures/rviz_human.png} + \caption{} + %{{\small $i = 1 \mu m$}} + \label{fig_human_model} + \end{subfigure} + \begin{subfigure}[b]{0.4\linewidth} + \includegraphics[width=\linewidth]{figures/interface_nao.png} + \caption{} + %{{\small $i = -1 \mu A$}} + \label{fig_nao_model} + \end{subfigure} + \caption{Operator and NAO in rviz.} + \label{fig_interface} +\end{figure} + + +\subsection{Navigation}\label{ssec:navigation} + +- Human Joystick (3dof) + +One of the two main feature in our robot is an intuitive navigation tool, which +allows the robot to navigate the environment by tracking the user movements. + +By fixing an ArUco marker on the user's chest, we can continuously track its +position and orientation in a three dimensional space and so capture its +motion. + +In order to simplify the task we define a buffer zone where the robot can only +track the orientation of the user then depending on which direction the user +will exit the zone the robot will either go forward, backward, left or right. +Also the covered distance will influence the speed of the robot, the further +the user is from the center of the buffer zone the faster the movement of the +robot will be. The extent of the movement and buffer zone are determined +automatically through calibration. + +\begin{figure} + \centering + \includegraphics[width=0.8\linewidth]{figures/usr_pt.png} + \caption{User position tracking model} + \label{fig_user_tracking} +\end{figure} + +\subsection{Imitation}\label{ssec:imitation} + +One of the main objectives of our project was the imitation of the operator +arm motions by the NAO. In order to perform this, first the appropriate mapping +between the relative locations of the detected ArUco markers and the desired +hand positions of the robot needs to be calculated. Then, based on the +target coordinates, the robot joint rotations need to be calculated. + +\paragraph{Posture retargeting} + +First, let us define the notation of the coordinates that we will use to +describe the posture retargeting procedure. Let $r$ denote the 3D $(x, y, z)$ +coordinates, then the subscript defines the object which has these coordinates, +and the superscript defines the coordinate frame in which these coordinates are +taken. So, for example, $r_{NAO hand}^{NAO torso}$ gives the coordinate of the +hand of the NAO robot in the frame of the robot's torso. + +After the ArUco markers are detected and published on ROS TF, as was described +in \ref{ssec:vision}, we have the three vectors $r_{aruco,chest}^{webcam}$, +$r_{aruco,lefthand}^{webcam}$ and $r_{aruco,righthand}^{webcam}$. We describe +the retargeting for one hand, since it is symmetrical for the other hand. We +also assume that all coordinate systems have the same orientation, with the +z-axis pointing upwards, the x-axis pointing straight into webcam and the +y-axis to the left of the webcam. Therefore, we can directly calculate the hand +position in the user chest frame by the means of the following equation: + +$$r_{hand,user}^{chest,user} = r_{aruco,hand}^{webcam} - +r_{aruco,chest}^{webcam}$$. + +Next, we remap the hand coordinates in the chest frame into the user shoulder +frame, using the following relation: + +$$r_{hand,user}^{shoulder,user} = r_{hand,user}^{chest,user} - r_{shoulder,user}^{chest,user}$$ + +We know the coordinates of the user's shoulder in the user's chest frame from +the calibration procedure, described in \ref{ssec:interface}. + +Now, we perform the retargeting of the user's hand coordinates to the desired +NAO's hand coordinates in the NAO's shoulder frame with the following formula: + +$$r_{hand,NAO}^{shoulder,NAO} = +\frac{L_{arm,NAO}}{L_{arm,user}} r_{hand,user}^{shoulder,user}$$ + +As before, we know the length of the user's arm through calibration and the +length of the NAO's arm through the specification provided by the manufacturer. + +A final step of the posture retargeting is to obtain the coordinates of the +end effector in the torso frame. This can be done through the following relation: + +$$r_{hand,NAO}^{torso,NAO} = +r_{hand,NAO}^{shoulder,NAO} + r_{shoulder,NAO}^{torso,NAO}$$ + +The coordinates of the NAO's shoulder in the NAO's torso frame can be obtained +through a call to the NAOqi API. + +Now that the desired position of the NAO's hands are known, the appropriate +joint motions need to be calculated by the means of Cartesian control. + +\paragraph{Cartesian control} + +For this a singular robust cartesian controller was build. + +The output of our cartesian controller are the 4 angles of the rotational +joints for the shoulder and the elbow part of each arm of the NAO robot, which +is described by the inverse kinematic formula + +$$\Delta\theta = J^{-1,robust}\Delta r$$ + +To build the cartesian controller first the Jacobian matrix is needed. The +content of the Jacobian matrix describes an approximation for the movement of +each joint of the robot. There are 2 main ways to determine the Jacobian +matrix. The first way is the numerical method, where this approximation is done +by checking how the end effector moves with small angles for rotational joints. +For this we can approximate each column of the Jacobian Matrix as followed: + +$$\frac{\partial r}{\partial\theta} \sim \frac{\Delta r}{\Delta\theta} = +\left( + \begin{array}{ccc} + \frac{\Delta r_x}{\Delta\theta} & + \frac{\Delta r}{\Delta\theta} & + \frac{\Delta r}{\Delta\theta} + \end{array} + \right)^{T}$$ + +The other method is the analytical method, which was used in this project. +Since only rotational joints were available, the approximation for the +Jacobian matrix, which is the tangent in rotational joints, can be calculated +using the cross product between the rotational axis $e$ and the rotational +vector \\ $r_{end}-r_{joint}$. + +$$ +\frac{\partial r_{end}}{\partial\theta _{joint}} = +(e \times (r_{end}-r_{joint})) +$$ + +which gives us one column of the Jacobian matrix. This can be repeated for +each rotational joint until the whole matrix is filled. + +The next step for the cartesian controller is to determine the inverse Jacobian +matrix for the inverse kinematic. For this singular value decomposition is +used. - Cartesian Controller + +\section{System Integration} + + +\section{Drawbacks and conclusions} + +% \begin{table}[htbp] +% \caption{Table Type Styles} +% \begin{center} +% \begin{tabular}{|c|c|c|c|} +% \hline +% \textbf{Table}&\multicolumn{3}{|c|}{\textbf{Table Column Head}} \\ +% \cline{2-4} +% \textbf{Head} & \textbf{\textit{Table column subhead}}& \textbf{\textit{Subhead}}& \textbf{\textit{Subhead}} \\ +% \hline +% copy& More table copy$^{\mathrm{a}}$& & \\ +% \hline +% \multicolumn{4}{l}{$^{\mathrm{a}}$Sample of a Table footnote.} +% \end{tabular} +% \label{tab_sample} +% \end{center} +% \end{table} + +% \begin{thebibliography}{00} + +% \bibitem{b1} + +% G. Eason, B. Noble, and I. N. Sneddon, +% ``On certain integrals of Lipschitz-Hankel type involving +% products of Bessel functions,'' +% Phil. Trans. Roy. Soc. London, vol. A247, pp. 529--551, April 1955. + +% \end{thebibliography} + +\end{document}