Home Reference Source Test

src/encoder/utf.mjs

'use strict';

/**
 * Convert UTF8/UTF16 string to binary input for hasher
 *
 * @param {string} message
 * @returns {string}
 */
export function fromUtf(message) {
  let raw = '';
  for (let i = 0, msgLen = message.length; i < msgLen; i++) {
    let charCode = message.charCodeAt(i);
    if (charCode < 0x80) {
      raw += String.fromCharCode(charCode);
    }
    else if (charCode < 0x800) {
      raw += String.fromCharCode(0xc0 | (charCode >> 6));
      raw += String.fromCharCode(0x80 | (charCode & 0x3f));
    }
    else if (charCode < 0xd800 || charCode >= 0xe000) {
      raw += String.fromCharCode(0xe0 | (charCode >> 12));
      raw += String.fromCharCode(0x80 | ((charCode >> 6) & 0x3f));
      raw += String.fromCharCode(0x80 | (charCode & 0x3f));
    }
    // surrogate pair
    else {
      i++;
      // UTF-16 encodes 0x10000-0x10FFFF by
      // subtracting 0x10000 and splitting the
      // 20 bits of 0x0-0xFFFFF into two halves
      charCode = 0x10000 + (((charCode & 0x3ff) << 10)
        | (message.charCodeAt(i) & 0x3ff));
      raw += String.fromCharCode(0xf0 | (charCode >> 18));
      raw += String.fromCharCode(0x80 | ((charCode >> 12) & 0x3f));
      raw += String.fromCharCode(0x80 | ((charCode >> 6) & 0x3f));
      raw += String.fromCharCode(0x80 | (charCode & 0x3f));
    }
  }
  return raw;
}