Source code for pyb_utils.camera

"""This module provides a utility ``Camera`` class for PyBullet."""
from pathlib import Path

import numpy as np
import pybullet as pyb
from PIL import Image
import cv2



[docs]
class Camera:
    """A PyBullet camera, which captures images of the simulation environment.

    Parameters
    ----------
    view_matrix : iterable
        The camera's view matrix.
    near : float
        Near value
    far : float
        Far value
    fov : float
        Field of view
    width : int
        Width of the image in pixels
    height : int
        Height of the image in pixels
    """

    def __init__(
        self,
        view_matrix,
        near=0.1,
        far=1000.0,
        fov=60.0,
        width=1280,
        height=720,
    ):
        self.near = near
        self.far = far
        self.width = width
        self.height = height

        self.view_matrix = view_matrix
        self.proj_matrix = pyb.computeProjectionMatrixFOV(
            fov=fov, aspect=width / height, nearVal=near, farVal=far
        )


[docs]
    @classmethod
    def from_camera_position(
        cls,
        target_position,
        camera_position,
        near=0.1,
        far=1000.0,
        fov=60.0,
        width=1280,
        height=720,
    ):
        """Construct a new camera from target and camera positions.

        Parameters
        ----------
        target_position : iterable
            The position of the camera's target point.
        camera_position : iterable
            The position of the camera in the world.
        near : float
            Near value
        far : float
            Far value
        fov : float
            Field of view
        width : int
            Width of the image in pixels
        height : int
            Height of the image in pixels
        """
        view_matrix = pyb.computeViewMatrix(
            cameraEyePosition=camera_position,
            cameraTargetPosition=target_position,
            cameraUpVector=[0, 0, 1],
        )
        return cls(
            view_matrix=view_matrix,
            near=near,
            far=far,
            fov=fov,
            width=width,
            height=height,
        )



[docs]
    @classmethod
    def from_distance_rpy(
        cls,
        target_position,
        distance,
        roll=0,
        pitch=0,
        yaw=0,
        near=0.1,
        far=1000.0,
        fov=60.0,
        width=1280,
        height=720,
    ):
        """Construct a new camera from target position, distance, and roll,
        pitch, yaw angles.

        Parameters
        ----------
        target_position : iterable
            The position of the camera's target point.
        distance : float
            The distance of camera from target.
        roll : float
            Roll of the camera's orientation.
        pitch : float
            Pitch of the camera's orientation.
        yaw : float
            Yaw of the camera's orientation.
        near : float
            Near value
        far : float
            Far value
        fov : float
            Field of view
        width : int
            Width of the image in pixels
        height : int
            Height of the image in pixels
        """
        view_matrix = pyb.computeViewMatrixFromYawPitchRoll(
            distance=distance,
            yaw=yaw,
            pitch=pitch,
            roll=roll,
            cameraTargetPosition=target_position,
            upAxisIndex=2,
        )
        return cls(
            view_matrix=view_matrix,
            near=near,
            far=far,
            fov=fov,
            width=width,
            height=height,
        )



[docs]
    def get_frame(self):
        """Get a frame from the camera.

        Returns
        -------
        :
            A tuple ``(rgba, depth, seg)``, where ``rgba`` is the RGBA color
            data of shape ``(height, width, 4)``, ``depth`` is the depth buffer
            of shape ``(height, width)``, and ``seg`` is the segmentation mask
            of shape ``(height, width)``.
        """
        _, _, rgba, depth, seg = pyb.getCameraImage(
            width=self.width,
            height=self.height,
            shadow=1,
            viewMatrix=self.view_matrix,
            projectionMatrix=self.proj_matrix,
            renderer=pyb.ER_BULLET_HARDWARE_OPENGL,
        )

        # the image has height rows x width columns
        return (
            np.array(rgba, dtype=np.uint8).reshape(
                (self.height, self.width, 4)
            ),
            np.array(depth).reshape((self.height, self.width)),
            np.array(seg).reshape((self.height, self.width)),
        )



[docs]
    def save_frame(self, filename, rgba=None):
        """Save a frame to a file.

        Parameters
        ----------
        filename : str
            The name of the image file.
        rgba : np.ndarray
            Optionally, the user can provide the RGBA data from a previous call
            to :meth:`Camera.get_frame()`. If not provided,
            ``self.get_frame()`` is called to retrieve this data.
        """
        if rgba is None:
            rgba, _, _ = self.get_frame()
        Image.fromarray(rgba, "RGBA").save(filename)



[docs]
    def linearize_depth(self, depth=None):
        """Convert depth map to actual distance from camera plane.

        See https://stackoverflow.com/a/6657284.

        Parameters
        ----------
        depth : np.ndarray
            Optionally, the user can provide the depth buffer from a previous
            call to :meth:`Camera.get_frame()`. If not provided,
            ``self.get_frame()`` is called to retrieve this data.

        Returns
        -------
        :
            Linearized depth buffer, consisting of the actual depth values from
            the camera plane.
        """
        if depth is None:
            _, depth, _ = self.get_frame()
        depth_ndc = 2 * depth - 1  # normalized device coordinates
        depth_linear = (
            2.0
            * self.near
            * self.far
            / (self.far + self.near - depth_ndc * (self.far - self.near))
        )
        return depth_linear



[docs]
    def set_camera_pose(self, position, target):
        """Change the position and target of the camera.

        Parameters
        ----------
        position : iterable
            The position of the camera in the world.
        target : iterable
            The position of the camera's target point.
        """
        self.position = position
        self.target = target
        self.view_matrix = pyb.computeViewMatrix(
            cameraEyePosition=position,
            cameraTargetPosition=target,
            cameraUpVector=[0, 0, 1],
        )



[docs]
    def get_point_cloud(self, depth=None):
        """Convert depth buffer to 3D point cloud in world coordinates.

        See https://stackoverflow.com/a/62247245 for the main source of this
        code.

        Parameters
        ----------
        depth : np.ndarray
            Optionally, the user can provide the depth buffer from a previous
            call to :meth:`Camera.get_frame()`. If not provided,
            ``self.get_frame()`` is called to retrieve this data.

        Returns
        -------
        An array of shape ``(height, width, 3)`` representing the points seen
        by the camera.
        """
        if depth is None:
            _, depth, _ = self.get_frame()

        # view matrix maps world coordinates to camera coordinates (extrinsics)
        V = np.array(self.view_matrix).reshape((4, 4), order="F")

        # camera projection matrix: map camera coordinates to clip coordinates
        # (intrinsics)
        P = np.array(self.proj_matrix).reshape((4, 4), order="F")

        PV_inv = np.linalg.inv(P @ V)

        points = np.zeros((self.height, self.width, 3))
        for h in range(self.height):
            for w in range(self.width):
                # convert to normalized device coordinates
                # notice that the y-transform is negative---we actually have a
                # left-handed coordinate frame here (x = right, y = down, z =
                # out of the screen)
                x = (2 * w - self.width) / self.width
                y = -(2 * h - self.height) / self.height

                # depth buffer is already in range [0, 1]
                z = 2 * depth[h, w] - 1

                # back out to world coordinates by applying inverted projection
                # and view matrices
                r_ndc = np.array([x, y, z, 1])
                r_world_unnormalized = PV_inv @ r_ndc

                # normalize homogenous coordinates to get rid of perspective
                # divide
                points[h, w, :] = (
                    r_world_unnormalized[:3] / r_world_unnormalized[3]
                )
        return points





[docs]
class VideoRecorder:
    """Recorder for a video of a PyBullet simulation.

    Parameters
    ----------
    filename : str
        The name of the file to write the video to.
    camera : Camera
        Camera object to use for rendering frames.
    fps : int
        Frames per second. Each ``fps`` frames will be played over one second
        of video.
    """

    def __init__(self, filename, camera, fps, codec="mp4v"):
        self.camera = camera
        fourcc = cv2.VideoWriter_fourcc(*codec)
        self.writer = cv2.VideoWriter(
            str(filename),
            fourcc,
            fps,
            (camera.width, camera.height),
        )


[docs]
    def capture_frame(self, rgba=None):
        """Capture a frame and write it to the video.

        Parameters
        ----------
        rgba : np.ndarray
            Optionally, the user can provide the RGBA data from a previous call
            to :meth:`Camera.get_frame()`. This can be used to avoid multiple
            renderings with the camera. If not provided,
            ``self.camera.get_frame()`` is called to retrieve this data.
        """
        if rgba is None:
            rgba, _, _ = self.camera.get_frame()

        # OpenCV uses BGR instead of RGB
        bgr = rgba[..., [2, 1, 0]]
        self.writer.write(bgr)





[docs]
class FrameRecorder:
    """Recorder for a set of frames from a PyBullet simulation.

    This is an alternative to the :class:`VideoRecorder`, which directly saves
    a video.

    Parameters
    ----------
    camera : Camera
        Camera object to use for rendering frames.
    fps : int
        Frames per second. Each ``fps`` frames will be played over one second
        of video.
    dirname : str
        The name of the directory to write the frames to. The directory will be
        created if it doesn't already exist.
    """
    def __init__(self, camera, fps=25, dirname=None):
        self.camera = camera
        self.timestep = 1.0 / fps
        self.save = not (dirname is None)

        if self.save:
            self.path = Path(dirname)
            self.path.mkdir(exist_ok=True)

        self.frame_count = 0
        self.last_record_time = -np.inf


[docs]
    def capture_frame(self, t, rgba=None):
        """Capture and save a frame if enough time has past since the last
        capture.

        This does nothing if less than ``1. / fps`` time has elapsed since the
        last frame was captured.

        Parameters
        ----------
        t : float
            The current time.
        rgba :
            Image data as returned by :meth:`Camera.get_frame`. Otherwise,
            ``self.camera.get_frame`` is called to retrieve this data.
        """
        if not self.save:
            return

        # bail if not enough time has elapsed to record a new frame
        if t < self.last_record_time + self.timestep:
            return

        # capture and save the frame
        frame_path = self.path / f"frame_{self.frame_count}.png"
        self.camera.save_frame(frame_path, rgba=rgba)

        self.frame_count += 1
        self.last_record_time = t