| // Copyright 2017 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // Private API for receiving real-time media perception information. |
| [platforms=("chromeos")] |
| namespace mediaPerceptionPrivate { |
| enum Status { |
| // The media analytics process is waiting to be launched. |
| UNINITIALIZED, |
| |
| // The analytics process is running and the media processing pipeline is |
| // started, but it is not yet receiving image frames. This is a |
| // transitional state between <code>SUSPENDED</code> and |
| // <code>RUNNING</code> for the time it takes to warm up the media |
| // processing pipeline, which can take anywhere from a few seconds to a |
| // minute. |
| // Note: <code>STARTED</code> is the initial reply to SetState |
| // <code>RUNNING</code>. |
| STARTED, |
| |
| // The analytics process is running and the media processing pipeling is |
| // injesting image frames. At this point, MediaPerception signals should |
| // be coming over D-Bus. |
| RUNNING, |
| |
| // Analytics process is running and the media processing pipeline is ready |
| // to be set to state <code>RUNNING</code>. The D-Bus communications |
| // are enabled but the media processing pipeline is suspended. |
| SUSPENDED, |
| |
| // Enum for restarting the media analytics process using Upstart. |
| // Calling setState <code>RESTARTING</code> will restart the media process |
| // to the <code>SUSPENDED</code> state. The app has to set the state to |
| // <code>RUNNING</code> in order to start receiving media perception |
| // information again. |
| RESTARTING, |
| |
| // Stops the media analytics process via Upstart. |
| STOPPED, |
| |
| // Indicates that a ServiceError has occurred. |
| SERVICE_ERROR |
| }; |
| |
| enum ServiceError { |
| // The media analytics process could not be reached. This is likely due to |
| // a faulty comms configuration or that the process crashed. |
| SERVICE_UNREACHABLE, |
| |
| // The media analytics process is not running. The MPP API knows that the |
| // process has not been started yet. |
| SERVICE_NOT_RUNNING, |
| |
| // The media analytics process is busy launching. Wait for setState |
| // <code>RUNNING</code> or setState <code>RESTARTING</code> callback. |
| SERVICE_BUSY_LAUNCHING, |
| |
| // The component is not installed properly. |
| SERVICE_NOT_INSTALLED, |
| |
| // Failed to establish a Mojo connection to the service. |
| MOJO_CONNECTION_FAILURE |
| }; |
| |
| enum Feature { |
| AUTOZOOM, |
| HOTWORD_DETECTION, |
| OCCUPANCY_DETECTION, |
| EDGE_EMBEDDINGS, |
| SOFTWARE_CROPPING |
| }; |
| |
| dictionary NamedTemplateArgument { |
| DOMString? name; |
| (DOMString or double)? value; |
| }; |
| |
| enum ComponentType { |
| // The smaller component with limited functionality (smaller size and |
| // limited models). |
| LIGHT, |
| // The fully-featured component with more functionality (larger size and |
| // more models). |
| FULL |
| }; |
| |
| // The status of the media analytics process component on the device. |
| enum ComponentStatus { |
| UNKNOWN, |
| // The component is successfully installed and the image is mounted. |
| INSTALLED, |
| // The component failed to download, install or load. |
| FAILED_TO_INSTALL |
| }; |
| |
| // Error code associated with a failure to install the media analytics |
| // component. |
| enum ComponentInstallationError { |
| // Component requested does not exist. |
| UNKNOWN_COMPONENT, |
| |
| // The update engine fails to install component. |
| INSTALL_FAILURE, |
| |
| // Component can not be mounted. |
| MOUNT_FAILURE, |
| |
| // The component is not compatible with the device. |
| COMPATIBILITY_CHECK_FAILED, |
| |
| // The component was not found - reported for load requests with kSkip |
| // update policy. |
| NOT_FOUND |
| }; |
| |
| dictionary Component { |
| ComponentType type; |
| }; |
| |
| // The state of the media analytics downloadable component. |
| dictionary ComponentState { |
| ComponentStatus status; |
| |
| // The version string for the current component. |
| DOMString? version; |
| |
| // If the component installation failed, the encountered installation |
| // error. Not set if the component installation succeeded. |
| ComponentInstallationError? installationErrorCode; |
| }; |
| |
| // ------------------- Start of process management definitions. ------------ |
| // New interface for managing the process state of the media perception |
| // service with the intention of eventually phasing out the setState() call. |
| enum ProcessStatus { |
| // The component process state is unknown, for example, if the process is |
| // waiting to be launched. This is the initial state before |
| // $(ref:setComponentProcessState) is first called. |
| UNKNOWN, |
| |
| // The component process has been started. |
| // This value can only be passed to $(ref:setComponentProcessState) if the |
| // process is currently in state <code>STOPPED</code> or |
| // <code>UNKNOWN</code>. |
| STARTED, |
| |
| // The component process has been stopped. |
| // This value can only be passed to $(ref:setComponentProcessState) if the |
| // process is currently in state <code>STARTED</code>. |
| // Note: the process is automatically stopped when the Chrome process |
| // is closed. |
| STOPPED, |
| |
| // Indicates that a ServiceError has occurred. |
| SERVICE_ERROR |
| }; |
| |
| dictionary ProcessState { |
| ProcessStatus? status; |
| |
| // Return parameter for $(ref:setComponentProcessState) that |
| // specifies the error type for failure cases. |
| ServiceError? serviceError; |
| }; |
| // ------------------- End of process management definitions. -------------- |
| |
| // The parameters for processing a particular video stream. |
| dictionary VideoStreamParam { |
| // Identifies the video stream described by these parameters. |
| DOMString? id; |
| |
| // Frame width in pixels. |
| long? width; |
| |
| // Frame height in pixels. |
| long? height; |
| |
| // The frame rate at which this video stream would be processed. |
| long? frameRate; |
| }; |
| |
| dictionary Point { |
| // The horizontal distance from the top left corner of the image. |
| double? x; |
| |
| // The vertical distance from the top left corner of the image. |
| double? y; |
| }; |
| |
| // The parameters for a whiteboard in the image frame. Corners are given in |
| // pixel coordinates normalized to the size of the image frame (i.e. in the |
| // range [(0.0, 0.0), (1.0, 1.0)]. The aspectRatio is the physical aspect |
| // ratio of the whiteboard (e.g. for a 1m high and 2m wide whiteboard, the |
| // aspect ratio would be 2). |
| dictionary Whiteboard { |
| // The top left corner of the whiteboard in the image frame. |
| Point? topLeft; |
| |
| // The top right corner of the whiteboard in the image frame. |
| Point? topRight; |
| |
| // The bottom left corner of the whiteboard in the image frame. |
| Point? bottomLeft; |
| |
| // The bottom right corner of the whiteboard in the image frame. |
| Point? bottomRight; |
| |
| // The physical aspect ratio of the whiteboard. |
| double? aspectRatio; |
| }; |
| |
| // The system and configuration state of the analytics process. |
| dictionary State { |
| Status status; |
| |
| // Optional $(ref:setState) parameter. Specifies the video device the media |
| // analytics process should open while the media processing pipeline is |
| // starting. To set this parameter, status has to be <code>RUNNING</code>. |
| DOMString? deviceContext; |
| |
| // Return parameter for $(ref:setState) or $(ref:getState) that |
| // specifies the error type for failure cases. |
| ServiceError? serviceError; |
| |
| // A list of video streams processed by the analytics process. To set this |
| // parameter, status has to be <code>RUNNING</code>. |
| VideoStreamParam[]? videoStreamParam; |
| |
| // Media analytics configuration. It can only be used when setting state to |
| // RUNNING. |
| DOMString? configuration; |
| |
| // Corners and aspect ratio of the whiteboard in the image frame. Should |
| // only be set when setting state to <code>RUNNING</code> and configuration |
| // to whiteboard. |
| Whiteboard? whiteboard; |
| |
| // A list of enabled media perception features. |
| Feature[]? features; |
| |
| // A list of named parameters to be substituted at start-up. Will |
| // only have effect when setting state to <code>RUNNING</code>. |
| NamedTemplateArgument[]? namedTemplateArguments; |
| }; |
| |
| dictionary BoundingBox { |
| // Specifies whether the points are normalized to the size of the image. |
| boolean? normalized; |
| |
| // The two points that define the corners of a bounding box. |
| Point? topLeft; |
| Point? bottomRight; |
| }; |
| |
| enum DistanceUnits { |
| UNSPECIFIED, |
| METERS, |
| PIXELS |
| }; |
| |
| // Generic dictionary to encapsulate a distance magnitude and units. |
| dictionary Distance { |
| // This field provides flexibility to report depths or distances of |
| // different entity types with different units. |
| DistanceUnits? units; |
| |
| double? magnitude; |
| }; |
| |
| enum EntityType { |
| UNSPECIFIED, |
| FACE, |
| PERSON, |
| MOTION_REGION, |
| LABELED_REGION |
| }; |
| |
| enum FramePerceptionType { |
| UNKNOWN_TYPE, |
| FACE_DETECTION, |
| PERSON_DETECTION, |
| MOTION_DETECTION |
| }; |
| |
| dictionary Entity { |
| // A unique id associated with the detected entity, which can be used to |
| // track the entity over time. |
| long? id; |
| |
| EntityType? type; |
| |
| // Label for this entity. |
| DOMString? entityLabel; |
| |
| // Minimum box which captures entire detected entity. |
| BoundingBox? boundingBox; |
| |
| // A value for the quality of this detection. |
| double? confidence; |
| |
| // The estimated depth of the entity from the camera. |
| Distance? depth; |
| }; |
| |
| dictionary PacketLatency { |
| // Label for this packet. |
| DOMString? packetLabel; |
| |
| // Packet processing latency in microseconds. |
| long? latencyUsec; |
| }; |
| |
| // Type of lighting conditions. |
| enum LightCondition { |
| UNSPECIFIED, |
| |
| // No noticeable change occurred. |
| NO_CHANGE, |
| |
| // Light was switched on in the room. |
| TURNED_ON, |
| |
| // Light was switched off in the room. |
| TURNED_OFF, |
| |
| // Light gradually got dimmer (for example, due to a sunset). |
| DIMMER, |
| |
| // Light gradually got brighter (for example, due to a sunrise). |
| BRIGHTER, |
| |
| // Black frame was detected - the current frame contains only noise. |
| BLACK_FRAME |
| }; |
| |
| // Detection of human presence close to the camera. |
| dictionary VideoHumanPresenceDetection { |
| // Indicates a probability in [0, 1] interval that a human is present in |
| // the video frame. |
| double? humanPresenceLikelihood; |
| |
| // Indicates a probability in [0, 1] that motion has been detected in the |
| // video frame. |
| double? motionDetectedLikelihood; |
| |
| // Indicates lighting condition in the video frame. |
| LightCondition? lightCondition; |
| |
| // Indicates a probablity in [0, 1] interval that |
| // <code>lightCondition</code> value is correct. |
| double? lightConditionLikelihood; |
| }; |
| |
| // The set of computer vision metadata for an image frame. |
| dictionary FramePerception { |
| long? frameId; |
| |
| long? frameWidthInPx; |
| long? frameHeightInPx; |
| |
| // The timestamp associated with the frame (when its recieved by the |
| // analytics process). |
| double? timestamp; |
| |
| // The list of entities detected in this frame. |
| Entity[]? entities; |
| |
| // Processing latency for a list of packets. |
| PacketLatency[]? packetLatency; |
| |
| // Human presence detection results for a video frame. |
| VideoHumanPresenceDetection? videoHumanPresenceDetection; |
| |
| // Indicates what types of frame perception were run. |
| FramePerceptionType[]? framePerceptionTypes; |
| }; |
| |
| // An estimate of the direction that the sound is coming from. |
| dictionary AudioLocalization { |
| // An angle in radians in the horizontal plane. It roughly points to the |
| // peak in the probability distribution of azimuth defined below. |
| double? azimuthRadians; |
| |
| // A probability distribution for the current snapshot in time that shows |
| // the likelihood of a sound source being at a particular azimuth. For |
| // example, <code>azimuthScores = [0.1, 0.2, 0.3, 0.4]</code> means that |
| // the probability that the sound is coming from an azimuth of 0, pi/2, pi, |
| // 3*pi/2 is 0.1, 0.2, 0.3 and 0.4, respectively. |
| double[]? azimuthScores; |
| }; |
| |
| // Spectrogram of an audio frame. |
| dictionary AudioSpectrogram { |
| double[]? values; |
| }; |
| |
| // Detection of human presence close to the microphone. |
| dictionary AudioHumanPresenceDetection { |
| // Indicates a probability in [0, 1] interval that a human has caused a |
| // sound close to the microphone. |
| double? humanPresenceLikelihood; |
| |
| // Estimate of the noise spectrogram. |
| AudioSpectrogram? noiseSpectrogram; |
| |
| // Spectrogram of an audio frame. |
| AudioSpectrogram? frameSpectrogram; |
| }; |
| |
| enum HotwordType { |
| UNKNOWN_TYPE, |
| OK_GOOGLE |
| }; |
| |
| // A hotword detected in the audio stream. |
| dictionary Hotword { |
| // Unique identifier for the hotword instance. Note that a single hotword |
| // instance can span more than one audio frame. In that case a single |
| // hotword instance can be reported in multiple Hotword or HotwordDetection |
| // results. Hotword results associated with the same hotword instance will |
| // have the same <code>id</code>. |
| long? id; |
| |
| // Indicates the type of this hotword. |
| HotwordType? type; |
| |
| // Id of the audio frame in which the hotword was detected. |
| long? frameId; |
| |
| // Indicates the start time of this hotword in the audio frame. |
| long? startTimestampMs; |
| |
| // Indicates the end time of this hotword in the audio frame. |
| long? endTimestampMs; |
| |
| // Indicates a probability in [0, 1] interval that this hotword is present |
| // in the audio frame. |
| double? confidence; |
| }; |
| |
| // Detection of hotword in the audio stream. |
| dictionary HotwordDetection { |
| Hotword[]? hotwords; |
| }; |
| |
| // Audio perception results for an audio frame. |
| dictionary AudioPerception { |
| // A timestamp in microseconds attached when this message was generated. |
| double? timestampUs; |
| |
| // Audio localization results for an audio frame. |
| AudioLocalization? audioLocalization; |
| |
| // Audio human presence detection results for an audio frame. |
| AudioHumanPresenceDetection? audioHumanPresenceDetection; |
| |
| // Hotword detection results. |
| HotwordDetection? hotwordDetection; |
| }; |
| |
| // Detection of human presence based on both audio and video inputs. |
| dictionary AudioVisualHumanPresenceDetection { |
| // Indicates a probability in [0, 1] interval that a human is present. |
| double? humanPresenceLikelihood; |
| }; |
| |
| // Perception results based on both audio and video inputs. |
| dictionary AudioVisualPerception { |
| // A timestamp in microseconds attached when this message was generated. |
| double? timestampUs; |
| |
| // Human presence detection results. |
| AudioVisualHumanPresenceDetection? audioVisualHumanPresenceDetection; |
| }; |
| |
| // Stores metadata such as version of media perception features. |
| dictionary Metadata { |
| DOMString? visualExperienceControllerVersion; |
| }; |
| |
| dictionary MediaPerception { |
| // The time the media perception data was emitted by the media processing |
| // pipeline. This value will be greater than the timestamp stored within |
| // the FramePerception dictionary and the difference between them can be |
| // viewed as the processing time for a single frame. |
| double? timestamp; |
| |
| // An array of framePerceptions. |
| FramePerception[]? framePerceptions; |
| |
| // An array of audio perceptions. |
| AudioPerception[]? audioPerceptions; |
| |
| // An array of audio-visual perceptions. |
| AudioVisualPerception[]? audioVisualPerceptions; |
| |
| // Stores metadata such as version of media perception features. |
| Metadata? metadata; |
| }; |
| |
| enum ImageFormat { |
| // Image represented by RGB data channels. |
| RAW, |
| PNG, |
| JPEG |
| }; |
| |
| dictionary ImageFrame { |
| long? width; |
| long? height; |
| |
| ImageFormat? format; |
| |
| long? dataLength; |
| |
| // The bytes of the image frame. |
| ArrayBuffer? frame; |
| }; |
| |
| dictionary PerceptionSample { |
| // The video analytics FramePerception for the associated image frame |
| // data. |
| FramePerception? framePerception; |
| |
| // The image frame data for the associated FramePerception object. |
| ImageFrame? imageFrame; |
| |
| // The audio perception results for an audio frame. |
| AudioPerception? audioPerception; |
| |
| // Perception results based on both audio and video inputs. |
| AudioVisualPerception? audioVisualPerception; |
| |
| // Stores metadata such as version of media perception features. |
| Metadata? metadata; |
| }; |
| |
| dictionary Diagnostics { |
| // Return parameter for $(ref:getDiagnostics) that specifies the error |
| // type for failure cases. |
| ServiceError? serviceError; |
| |
| // A buffer of image frames and the associated video analytics information |
| // that can be used to diagnose a malfunction. |
| PerceptionSample[]? perceptionSamples; |
| }; |
| |
| callback StateCallback = void(State state); |
| |
| callback DiagnosticsCallback = void(Diagnostics diagnostics); |
| |
| callback ComponentStateCallback = void(ComponentState componentState); |
| |
| callback ProcessStateCallback = void(ProcessState processState); |
| |
| interface Functions { |
| // Gets the status of the media perception process. |
| // |callback| : The current state of the system. |
| static void getState(StateCallback callback); |
| |
| // Sets the desired state of the system. |
| // |state| : A dictionary with the desired new state. The only settable |
| // states are <code>RUNNING</code>, <code>SUSPENDED</code>, and |
| // <code>RESTARTING</code>. |
| // |callback| : Invoked with the State of the system after setting it. Can |
| // be used to verify the state was set as desired. |
| static void setState(State state, StateCallback callback); |
| |
| // Get a diagnostics buffer out of the video analytics process. |
| // |callback| : Returns a Diagnostics dictionary object. |
| static void getDiagnostics(DiagnosticsCallback callback); |
| |
| // Attempts to download and load the media analytics component. This |
| // function should be called every time a client starts using this API. If |
| // the component is already loaded, the callback will simply return that |
| // information. The process must be <code>STOPPED</code> for this function |
| // to succeed. |
| // Note: If a different component type is desired, this function can |
| // be called with the new desired type and the new component will be |
| // downloaded and installed. |
| // |component| : The desired component to install and load. |
| // |callback| : Returns the state of the component. |
| static void setAnalyticsComponent( |
| Component component, |
| ComponentStateCallback callback); |
| |
| // Manages the lifetime of the component process. This function should |
| // only be used if the component is installed. It will fail if the |
| // component is not installed. |
| // |processState| : The desired state for the component process. |
| // |callback| : Reports the new state of the process, which is expected to |
| // be the same as the desired state, unless something goes wrong. |
| static void setComponentProcessState( |
| ProcessState processState, ProcessStateCallback callback); |
| }; |
| |
| interface Events { |
| // Fired when media perception information is received from the media |
| // analytics process. |
| // |mediaPerception| : The dictionary which contains a dump of everything |
| // the analytics process has detected or determined from the incoming media |
| // streams. |
| static void onMediaPerception(MediaPerception mediaPerception); |
| }; |
| }; |