<?xml version="1.0" encoding="utf-8"?>
<!-- 
     draft-rfcxml-general-template-standard-00
  
     This template includes examples of the most commonly used features of RFCXML with comments 
     explaining how to customise them. This template can be quickly turned into an I-D by editing 
     the examples provided. Look for [REPLACE], [REPLACE/DELETE], [CHECK] and edit accordingly.
     Note - 'DELETE' means delete the element or attribute, not just the contents.
     
     Documentation is at https://authors.ietf.org/en/templates-and-schemas
-->
<?xml-model href="rfc7991bis.rnc"?>  <!-- Required for schema validation and schema-aware editing -->
<!-- <?xml-stylesheet type="text/xsl" href="rfc2629.xslt" ?> -->
<!-- This third-party XSLT can be enabled for direct transformations in XML processors, including most browsers -->


<!DOCTYPE rfc [
  <!ENTITY nbsp    "&#160;">
  <!ENTITY zwsp   "&#8203;">
  <!ENTITY nbhy   "&#8209;">
  <!ENTITY wj     "&#8288;">
]>
<!-- If further character entities are required then they should be added to the DOCTYPE above.
     Use of an external entity file is not recommended. -->

<rfc
  xmlns:xi="http://www.w3.org/2001/XInclude"
  category="std"
  docName="draft-valin-opus-dred-00"
  ipr="trust200902"
  obsoletes=""
  updates="6716"
  submissionType="IETF"
  xml:lang="en"
  version="3">
<!-- [REPLACE] 
       * docName with name of your draft
     [CHECK] 
       * category should be one of std, bcp, info, exp, historic
       * ipr should be one of trust200902, noModificationTrust200902, noDerivativesTrust200902, pre5378Trust200902
       * updates can be an RFC number as NNNN
       * obsoletes can be an RFC number as NNNN 
-->

  <front>
    <title abbrev="Opus DRED">Deep Audio Redundancy (DRED) Extension for the Opus Codec</title>

    <seriesInfo name="Internet-Draft" value="draft-valin-opus-dred-00"/>
   
    <author fullname="Jean-Marc Valin" initials="JM" surname="Valin">
      <!-- [CHECK]
             * initials should not include an initial for the surname
             * role="editor" is optional -->
    <!-- Can have more than one author -->
      
    <!-- all of the following elements are optional -->
      <organization>Amazon</organization>
      <address>
        <postal>
          <country>CA</country>
          <!-- Uses two letter country code -->
        </postal>
        <email>jmvalin@amazon.com</email>  
      </address>
    </author>

    <author fullname="Jan Buethe" initials="J" surname="Buethe">
      <organization>Amazon</organization>
      <address>
        <postal>
          <country>DE</country>
          <!-- Uses two letter country code -->
        </postal>
        <email>jbuethe@amazon.com</email>  
      </address>
    </author>

    <date year="2023"/>
    <!-- On draft subbmission:
         * If only the current year is specified, the current day and month will be used.
         * If the month and year are both specified and are the current ones, the current day will
           be used
         * If the year is not the current one, it is necessary to specify at least a month and day="1" will be used.
    -->

    <area>Applications and Real-Time</area>
    <workgroup>Internet Engineering Task Force</workgroup>
    <!-- "Internet Engineering Task Force" is fine for individual submissions.  If this element is
          not present, the default is "Network Working Group", which is used by the RFC Editor as
          a nod to the history of the RFC Series. -->

    <keyword>Opus, RFC6716</keyword>

    <abstract>
      <t>This document proposes a mechanism for embedding very low bitrate deep audio redundancy (DRED)
          within the Opus codec (RFC6716) bitstream. </t>
    </abstract>

  </front>

  <middle>

    <section>
      <name>Introduction</name>
      <t>This document proposes a mechanism for embedding very low bitrate deep audio redundancy (DRED)
          within the Opus codec <xref target="RFC6716"/> bitstream.</t>

      <section>
        <name>Requirements Language</name>
        <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL",
          "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT
          RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
          interpreted as described in BCP 14 <xref target="RFC2119"/>
          <xref target="RFC8174"/> when, and only when, they appear in
          all capitals, as shown here.</t>
      </section>

    </section>

    <section>
      <name>DRED Extension Format</name>
      <t>We use the Opus extension mechanism <xref target="opus-extension"/> to add deep redundancy within the padding
          of an Opus packet. We use the extension ID 32, which means that the L flag signals whether a length code is included.
          In this document, we define only the extension payload. [Note: until adoption by the IETF, experimental
          implementations of DRED MUST use experiment extension ID 127 to avoid causing interoperability problems]</t>

      <t>The principles behind the DRED mechanism defined in this extension are explained in <xref target="dred-paper"/>.
          All the data in the extension payload is encoded using the Opus entropy coder defined in Section&nbsp;4.1 of
          <xref target="RFC6716"/>. Since some of the fields at the beginning of the payload are encoded with flat
          binary probabilities, they can still be interpreted as bits.</t>

      <t>The extension starts with an offset indicator, encoded as a signed 5-bit integer (two's complement) in units of 2.5&nbsp;ms.
          The offset indicates the time of the last sample analysed for the transmitted features in the packet, measured
          from the time of the first sample in the Opus frame that contains the extension data.</t>

      <t>The offset is followed by a 4-bit initial quantizer field (Q0) ranging from 0 to 15. That quantizer is used on the most recent
          frame encoded and is followed by the 3-bit quantizer slope dQ. The 3-bit dQ index selects from the following values:
          [0, 1/8, 3/16, 1/4, 3/8, 1/2, 3/4, 1] quantizer step per frame. The quantizer for frame k is thus given by:
          min(15, round(Q0 + dQ_table[dQ] * k)). For example, using Q0=5 and dQ=2 (3/16), frame k=20 would
          use a quantizer of round(5 + 3/16 * k) = 9.</t>

      <t>The compressed redundancy information consists of an initial state coded with a pyramid vector quantizer (PVQ),
          followed by the entropy-coded latent representation. The number of 40-ms DRED blocks is not coded explicitly.
          Instead, the decoder MUST NOT decode blocks when fewer than 8 bits remain in the DRED payload.</t>

      <figure>
        <name>Extension framing</name>
        <artset>
        <!-- This <artset> includes two <artwork> elements, each of a different type -->
          <artwork type="ascii-art" name="dred-framing.txt">
            <![CDATA[
    0                   1                   2                   3
    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |  Offset |  Q0   |  dQ |    PVQ                                |
   +-+-+-+-+-+-+-+-+-+-+-+-+                                       +
   :                                                               :
   |            ...                +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                               |  Latent coeffs                |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   :                                                               :
   |                                                               |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
            ]]>
          </artwork>
        </artset>
      </figure>


    </section>

    <section anchor="IANA">
    <!-- All drafts are required to have an IANA considerations section. See RFC 8126 for a guide.-->
      <name>IANA Considerations</name>
      <t>This document assigns ID 32 to the "Opus Extension IDs" registry to implement the proposed DRED
          extension. </t>

    </section>
    
    <section anchor="Security">
      <!-- All drafts are required to have a security considerations section. See RFC 3552 for a guide. -->
      <name>Security Considerations</name>
      <t>As is the case for any media codec, the decoder must be robust against malicious payloads.
         Similarly, the encoder must also be robust to malicious audio input since the encoder input
         can often be controlled by an attacker. That can happen through browser JS, echo, or when
         the encoder is on a gateway.</t>
     
      <t>DRED is designed to have a complexity that is independent of the signal characteristics. However,
         there exist implementation details that can cause signal-dependent complexity changes. One example
         is CPU treatement of denormals that can sometimes cause increased CPU load and could be triggered
         by malicious input. For that reason, it is important to minimize such impact to reduce the impact
         of DOS attacks. Similarly, since the encoding and decoding process can be cputationally costly,
         devices must manage the complexity to avoid attacks that could trigger too much DRED encoding or decoding
         to be performed.</t>
     
      <t>The use of variable-bitrate (VBR) encoding in DRED poses a theoretical information leak threat
         <xref target="RFC6562"/>, but that threat is believed to be significantly lower than that posed by
         VBR encoding in the main Opus payload. Since this document provides a way to dymanically vary the amount
         of redundancy transmitted, it is also possible to reduce the overall VBR risk of Opus by using DRED as
         a way of making the total Opus payload constant (CBR) or nearly constant.</t>
    </section>

  </middle>

  <back>
    <references>
      <name>References</name>
      <references>
        <name>Normative References</name>
        <xi:include href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.2119.xml"/>
        <xi:include href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.8174.xml"/>
        <xi:include href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.6716.xml"/>
        <!-- The recommended and simplest way to include a well known reference -->
        <reference anchor="opus-extension">
        <!-- Manually added reference -->
          <front>
            <title>Extension Formatting for the Opus Codec (draft-valin-opus-extension)</title>
            <author initials="J.-M." surname="Valin" fullname="Jean-Marc Valin">
              <organization/>
            </author>
            <date year="2023" month="March"/>
            <abstract>
              <t>Opus extension format.
              </t>
            </abstract>
          </front>
        </reference>
      </references>
 
      <references>
        <name>Informative References</name>
        <xi:include href="https://www.rfc-editor.org/refs/bibxml/reference.RFC.6562.xml"/>       
        <reference anchor="dred-paper" target="https://arxiv.org/abs/2212.04453">
          <front>
            <title>Low-Bitrate Redundancy Coding of Speech Using a Rate-Distortion-Optimized Variational Autoencoder</title>
            <author initials="J.-M." surname="Valin"/>
            <author initials="J." surname="Buethe"/>
            <author initials="A." surname="Mustafa"/>
            <date year="2023"/>
          </front>
        </reference>

      </references>
    </references>
    
    <!--
    <section>
      <name>Appendix 1 [REPLACE/DELETE]</name>
      <t>This becomes an Appendix [REPLACE]</t>
    </section>

    <section anchor="Acknowledgements" numbered="false">
      <name>Acknowledgements</name>
      <t>We would like to thank...</t>
    </section>
    
    <section anchor="Contributors" numbered="false">
      <name>Contributors</name>
      <t>Thanks to all of the contributors. [REPLACE]</t>
      </section>
    -->
    
 </back>
</rfc>
