From fb04a3278806d37202522b93fcb6bc7e17c9dc9b Mon Sep 17 00:00:00 2001
From: Pavel Lutskov <pavel.lutskov@gmail.com>
Date: Fri, 1 Mar 2019 14:44:44 +0100
Subject: [PATCH] updated report, added references

---
 docs/references.bib |  47 ++++++++++
 docs/report.latex   | 211 ++++++++++++++++++++++++++++++++------------
 2 files changed, 201 insertions(+), 57 deletions(-)
 create mode 100644 docs/references.bib

diff --git a/docs/references.bib b/docs/references.bib
new file mode 100644
index 0000000..8ff4086
--- /dev/null
+++ b/docs/references.bib
@@ -0,0 +1,47 @@
+@article{aruco,
+    author = {Romero Ramirez, Francisco and Muñoz-Salinas,
+        Rafael and Medina-Carnicer, Rafael},
+    year = {2018},
+    month = {06},
+    title = {Speeded Up Detection of Squared Fiducial Markers},
+    volume = {76},
+    journal = {Image and Vision Computing},
+    doi = {10.1016/j.imavis.2018.05.004}
+}
+
+@misc{cam-toolbox,
+    author =  {Jean-Yves Bouguet},
+    year = {2015},
+    month = {10},
+    title = {Camera Calibration Toolbox for {M}atlab},
+    howpublished={\url{http://www.vision.caltech.edu/bouguetj/calib_doc/index.html}},
+    note={Accessed: 2019-03-01}
+}
+
+@misc{ros-speech,
+    author = {Oliveira, Jose Pedro and Perdigao, Fernando},
+    title = {{Speech Recog UC}},
+    year = {2018},
+    howpublished={\url{http://wiki.ros.org/speech_recog_uc}},
+    note={Accessed: 2019-03-01}
+}
+
+@misc{homography,
+    author = {Frank, Barbara and Stachniss, Cyrill and
+        Grisetti, Giorgio and Arras, Kai and Burgard, Wolfram},
+    title = {Robotics 2, Camera Calibration},
+    howpublished={\url{http://ais.informatik.uni-freiburg.de/teaching/ws09/robotics2/pdfs/rob2-08-camera-calibration.pdf}},
+    note={Accessed: 2019-03-01}
+}
+
+@misc{ros,
+    title={{ROS.org | Powering} the world's robots},
+    howpublished={\url{http://www.ros.org/}},
+    note={Accessed: 2019-01-03}
+}
+
+@misc{naoqi,
+    title={{NAOqi} Developer guide},
+    howpublished={\url{http://doc.aldebaran.com/2-1/index_dev_guide.html}},
+    note={Accessed: 2018-08-08}
+}
diff --git a/docs/report.latex b/docs/report.latex
index 4d4a92f..e8408d7 100644
--- a/docs/report.latex
+++ b/docs/report.latex
@@ -82,20 +82,41 @@ motions and the constant operator input, the robot is able to pick up an object
 of different shapes and sizes, applying different strategies when needed. We
 demonstrate the functioning of our system in the supporting video.
 
-We used ROS as a framework for our implementation. ROS is a well-established
-software for developing robot targeted applications with rich support
-infrastructure and modular approach to logic organization For interacting 
-with the robot we mainly relied on the NAOqi Python API. The advantage of using
-Python compared to C++ is a much higher speed of development and a more concise
-and readable resulting code.
+We used ROS \cite{ros} as a framework for our implementation. ROS is a
+well-established software for developing robot targeted applications with rich
+support infrastructure and modular approach to logic organization. For
+interacting with the robot we mainly relied on the NAOqi \cite{naoqi} Python
+API. The advantage of using Python compared to C++ is a much higher speed of
+development and a more concise and readable resulting code. We, therefore, used
+C++ only for the most computationally intensive parts of our program, such as
+the ArUco marker detection, because of the efficiency of the C++.
 
 \section{System Overview}
 
 \subsection{Vision}\label{ssec:vision}
 
-- Camera calibration
-- Aruco marker extraction
-- TF world coordinate publishing
+The foundational building block of our project is a computer vision system for
+detection of the position and the orientation of ArUco markers \cite{aruco}. In
+our implementation we follow closely the HRS Tutorial 4 and leverage the
+functionality of the ROS ArUco library. One major difference from the lecture,
+however, lies in finding the calibration matrix of the camera. In the tutorial
+we could retrieve the camera intrinsics of the NAO's camera through a call to
+the NAO API. In our case, however, a third-party webcam was used, the
+intrinsics of which we didn't know. In order to find the camera matrix, we used
+a common approach based on the calculation of a homography matrix through a
+search for correspondent points in a series of planar scenes \cite{homography}.
+In particular, we used three checkerboard patterns and the Camera Calibration
+Toolbox for Matlab \cite{cam-toolbox}. Our experiments confirmed that the
+positions and the orientations of the ArUco markers are calculated correctly,
+and therefore the calibration was correct.
+
+On the higher level, we extract the coordinates of the ArUco markers in the
+webcam frame, then apply a rotational transformation, so that the Z-coordinate
+of the markers correctly corresponds to the height \footnote{In the camera
+  frame the Z-coordinate is parallel to the camera axis.}. Finally, we publish
+the transforms of the markers with respect to the \verb|odom| frame
+\footnote{The choice of the parent frame is arbitrary as long as it is
+  consistent throughout the project.} using the ROS \verb|tf|.
 
 \begin{figure}
   \centerline{\includegraphics[width=0.8\linewidth]{figures/aruco.png}}
@@ -170,8 +191,6 @@ the \textit{NAO-meshes} package to create the 3D model of the NAO.
 
 \subsection{Navigation}\label{ssec:navigation}
 
-- Human Joystick (3dof)
-
 One of the two main feature in our robot is an intuitive navigation tool, which
 allows the robot to navigate the environment by tracking the user movements.
 
@@ -191,7 +210,7 @@ automatically through calibration.
   \centering
   \includegraphics[width=0.8\linewidth]{figures/usr_pt.png}
   \caption{User position tracking model}
-  \label{fig_user_tracking}
+  \label{fig:joystick}
 \end{figure}
 
 \subsection{Imitation}\label{ssec:imitation}
@@ -230,23 +249,24 @@ hand of the NAO robot in the frame of the robot's torso.
   \label{fig:coord-frames}
 \end{figure}
 
-After the ArUco markers are detected and published on ROS TF, as was described
-in \autoref{ssec:vision}, we have the three vectors $r_{aruco,chest}^{webcam}$,
-$r_{aruco,lefthand}^{webcam}$ and $r_{aruco,righthand}^{webcam}$. We describe
-the retargeting for one hand, since it is symmetrical for the other hand. We
-also assume that the user's coordinate systems have the same orientation, with
-the z-axis pointing upwards, the x-axis pointing straight into webcam and the
-y-axis to the left of the webcam \footnote{This assumption holds, because for
-  the imitation mode the user always faces the camera directly and stands
-  straight up. We need this assumption for robustness against the orientation
-  of the chest marker, since it can accidentally get tilted. If we would bind
-  the coordinate system to the chest marker completely, we would need to place
-  the marker on the chest firmly and carefully, which is time consuming.}.
-Therefore, we can directly calculate the hand position in the user chest frame
-by the means of the following equation:
+After the ArUco markers are detected and published on ROS \verb|tf|, as was
+described in \autoref{ssec:vision}, we have the three vectors
+$r_{aruco,chest}^{webcam}$, $r_{aruco,lefthand}^{webcam}$ and
+$r_{aruco,righthand}^{webcam}$. We describe the retargeting for one hand, since
+it is symmetrical for the other hand. We also assume that the user's coordinate
+systems have the same orientation, with the z-axis pointing upwards, the x-axis
+pointing straight into webcam and the y-axis to the left of the webcam
+\footnote{This assumption holds, because for the imitation mode the user always
+  faces the camera directly and stands straight up. We need this assumption for
+  robustness against the orientation of the chest marker, since it can
+  accidentally get tilted. If we would bind the coordinate system to the chest
+  marker completely, we would need to place the marker on the chest firmly and
+  carefully, which is time consuming.}. Therefore, we can directly calculate
+the hand position in the user chest frame by the means of the following
+equation:
 
 $$r_{hand,user}^{chest,user} = r_{aruco,hand}^{webcam} -
-r_{aruco,chest}^{webcam}$$.
+r_{aruco,chest}^{webcam}$$
 
 Next, we remap the hand coordinates in the chest frame into the user shoulder
 frame, using the following relation:
@@ -266,8 +286,8 @@ $$r_{hand,NAO}^{shoulder,NAO} =
 As before, we know the length of the user's arm through calibration and the
 length of the NAO's arm through the specification provided by the manufacturer.
 
-A final step of the posture retargeting is to obtain the coordinates of the
-end effector in the torso frame. This can be done through the following relation:
+A final step of the posture retargeting is to obtain the coordinates of the end
+effector in the torso frame. This can be done through the following relation:
 
 $$r_{hand,NAO}^{torso,NAO} =
 r_{hand,NAO}^{shoulder,NAO} + r_{shoulder,NAO}^{torso,NAO}$$
@@ -280,47 +300,121 @@ joint motions need to be calculated by the means of Cartesian control.
 
 \paragraph{Cartesian control}
 
-For this a singular robust cartesian controller was build.
+At first, we tried to employ the Cartesian controller that is shipped with the
+NAOqi SDK. We soon realized, however, that this controller was unsuitable for
+our task, because of the two significant limitations. The first problem with
+the NAO's controller is that it freezes, if the target is being updated too
+often: the arms of the robot start to stutter, and then make a final erratic
+motion once the program is terminated. However, arm teleoperation requires
+smoothness and therefore frequent updates of the target position, and the NAO
+controller didn't fit these requirements. A possible reason for such behavior
+is a bug in the implementation, and it might be possible that this problem was
+fixed in the later versions of the NAOqi SDK.
 
-The output of our cartesian controller are the 4 angles of the rotational
-joints for the shoulder and the elbow part of each arm of the NAO robot, which
-is described by the inverse kinematic formula
+Secondly, the controller of the NAO is not robust against
+\textit{singularities}. Singularities occur, when the kinematic chain loses one
+of the degrees of freedom, and so in order to reach a desired position, the
+joint motors must apply infinite torques. Practically, for the imitation task
+this would mean that once the robot has its arms fully stretched, the arms
+would execute violent erratic motions which would hurt the robot or cause it to
+lose balance. Therefore, we needed to implement our own Cartesian controller,
+which would allow us to operate the robot smoothly and don't worry about the
+singularities.
 
-$$\Delta\theta = J^{-1,robust}\Delta r$$
+In our case, the output of the Cartesian controller are the 4 angles of the
+rotational joints for the shoulder and the elbow part of each arm of the NAO
+robot. The angle speeds for the joints can be calculated using the following
+formula:
 
-To build the cartesian controller first the Jacobian matrix is needed. The
-content of the Jacobian matrix describes an approximation for the movement of
-each joint of the robot. There are 2 main ways to determine the Jacobian
-matrix. The first way is the numerical method, where this approximation is done
-by checking how the end effector moves with small angles for rotational joints.
-For this we can approximate each column of the Jacobian Matrix as followed:
+$$\dot{\theta} = J^{-1}\dot{r}$$
 
-$$\frac{\partial r}{\partial\theta} \sim \frac{\Delta r}{\Delta\theta} =
+In this formula $\dot{r}$ denotes the 3D speed of the target, which is the
+result of the posture retargeting, namely $r_{hand,NAO}^{torso,NAO}$. $J$ is
+the Jacobian matrix. The Jacobian matrix gives the relationship between
+the joint angle speed and the resulting speed of the effector
+on the end of the kinematic chain which the Jacobian matrix describes.
+
+We now apply a common simplification and state that
+
+$$\Delta \theta \approx J^{-1}\Delta r$$
+
+Here $\Delta$ is a small change in angle or the position. We use
+
+$$\Delta r = \frac{r_{desired} - r_{current}}{K},\ K = 10$$
+
+This means that we want the $r$ to make a small movement in the
+direction of the desired position.
+
+Now we need to calculate a Jacobian matrix. There are 2 main ways to determine
+the Jacobian matrix. The first way is the numerical method, where this
+approximation is done by checking how the end effector moves with small joint
+angle changes. For this we can approximate each column of the Jacobian Matrix
+as follows:
+
+$$
+J_j = \frac{\partial r}{\partial\theta_j} \approx
+\frac{\Delta r}{\Delta\theta_j} =
 \left(
     \begin{array}{ccc}
-      \frac{\Delta r_x}{\Delta\theta} &
-      \frac{\Delta r}{\Delta\theta} &
-      \frac{\Delta r}{\Delta\theta}
+      \frac{\Delta r_x}{\Delta\theta_j} &
+      \frac{\Delta r_y}{\Delta\theta_j} &
+      \frac{\Delta r_z}{\Delta\theta_j}
     \end{array}
-  \right)^{T}$$
+  \right)^{T}
+  $$
 
-The other method is the analytical method, which was used in this project.
-Since only rotational joints were available, the approximation for the
-Jacobian matrix, which is the tangent in rotational joints, can be calculated
-using the cross product between the rotational axis $e$ and the rotational
-vector \\ $r_{end}-r_{joint}$.
+We tested this approach, the results, however, were rather unstable, and due to
+the lack of time we didn't investigate the possible ways to make this approach
+perform better. A possible reason for bad performance of this method could be
+the imprecise readings from the NAO's joint sensors and the imprecise
+calculation of the position of the end effector, also performed by the NAO
+internally.
+
+The other method that we employed was to calculate the Jacobian matrix
+analytically. Since only rotational joints were available, the approximation
+for the Jacobian matrix, which is the tangent in rotational joints, can be
+calculated using the cross product between the rotational axis of a joint,
+denoted by $e_j$, and the rotational vector \\ $r_{end}-r_{j}$, where $r_{end}$
+is the position of the end effector (i.e.\ hand) and $r_{j}$ is the position of
+the joint. The following relation gives us one column of the Jacobian matrix.
+We can get the rotational axis of a joint and the position of the joint in the
+torso frame through NAOqi API.
 
 $$
-\frac{\partial r_{end}}{\partial\theta _{joint}} =
-(e \times (r_{end}-r_{joint}))
+J_j = \frac{\partial r_{end}}{\partial\theta_j} =
+(e \times (r_{end}-r_j))
 $$
 
-which gives us one column of the Jacobian matrix. This can be repeated for
-each rotational joint until the whole matrix is filled.
+This can be repeated for each rotational joint until the whole matrix is
+filled.
 
 The next step for the Cartesian controller is to determine the inverse Jacobian
 matrix for the inverse kinematic. For this singular value decomposition is
-used.
+used, which is given by
+
+$$J = U\Sigma V^T$$
+
+Then, the inverse can be calculated by
+
+$$J^{-1} = V \Sigma^{-1} U^T$$
+
+One advantage of this approach is that it can be employed to find a
+pseudoinverse of a non-square matrix. Furthermore, the diagonal matrix $\Sigma$
+has the $J$'s singular values in its main diagonal. If any of the singular
+values are close to zero, this means that the $J$ has lost rank and therefore
+the singularity occurs. We can calculate
+
+$$\Sigma^{-1} = (\frac{1}{\Sigma})^T$$
+
+Then we can avoid the singularity behavior by setting to $0$ the entries in
+$\Sigma^{-1}$ that are above a threshold value $\tau = 50$, which we determined
+through experimentation.
+
+Our test have shown, that our controller doesn't have the freezing behavior,
+which is present in the NAO's own controller, and therefore the target of
+the control can be updated with arbitrary frequency. Furthermore, our controller
+shows no signs of producing violent arm motions, which means that our strategy
+for handling singularities was effective.
 
 \section{System Implementation and Integration}
 
@@ -408,10 +502,10 @@ task. We implemented an easy to use prototype of a teleoperation system, which
 is fairly robust to the environmental conditions. Furthermore, we researched
 several approaches to the implementation of the Cartesian control, and were
 able to create a Cartesian controller, which is superior to the NAO's built-in
-one. Finally, we extensively used ROS and so can confidently employ ROS in the
+one. Finally, we extensively used ROS and now can confidently employ ROS in the
 future projects.
 
-Our resulting system has a few drawbacks, however, and there is a room for
+Our resulting system has a few drawbacks, however, and there is room for
 future improvements. Some of these drawbacks are due to the time constraints,
 the other ones have to do with the limitations of NAO itself. The first major
 drawback is the reliance on the NAO's built-in speech recognition for
@@ -459,6 +553,9 @@ our experiments. Due to the time constraints, we weren't able to investigate
 any approaches to make the walking more stable. This, however, can be an
 interesting topic for future semester projects.
 
+\bibliography{references}{}
+\bibliographystyle{IEEEtran}
+
 % \begin{table}[htbp]
 % \caption{Table Type Styles}
 % \begin{center}