diff --git a/.eslintrc.js b/.eslintrc.js
index caeeca403d20..e26a2ba95b38 100644
--- a/.eslintrc.js
+++ b/.eslintrc.js
@@ -70,6 +70,11 @@ module.exports = {
name: "matrix-react-sdk/",
message: "Please use matrix-react-sdk/src/index instead",
},
+ {
+ name: "emojibase-regex",
+ message:
+ "This regex doesn't actually test for emoji. See the docs at https://emojibase.dev/docs/regex/ and prefer our own EMOJI_REGEX from HtmlUtils.",
+ },
],
patterns: [
{
@@ -138,6 +143,11 @@ module.exports = {
],
message: "Please use matrix-js-sdk/src/matrix instead",
},
+ {
+ group: ["emojibase-regex/emoji*"],
+ message:
+ "This regex doesn't actually test for emoji. See the docs at https://emojibase.dev/docs/regex/ and prefer our own EMOJI_REGEX from HtmlUtils.",
+ },
],
},
],
diff --git a/src/HtmlUtils.tsx b/src/HtmlUtils.tsx
index b63ed1dcf0ce..0ad3477d821f 100644
--- a/src/HtmlUtils.tsx
+++ b/src/HtmlUtils.tsx
@@ -20,7 +20,6 @@ limitations under the License.
import React, { LegacyRef, ReactNode } from "react";
import sanitizeHtml from "sanitize-html";
import classNames from "classnames";
-import EMOJIBASE_REGEX from "emojibase-regex";
import katex from "katex";
import { decode } from "html-entities";
import { IContent } from "matrix-js-sdk/src/matrix";
@@ -46,10 +45,17 @@ const SURROGATE_PAIR_PATTERN = /([\ud800-\udbff])([\udc00-\udfff])/;
const SYMBOL_PATTERN = /([\u2100-\u2bff])/;
// Regex pattern for non-emoji characters that can appear in an "all-emoji" message
-// (Zero-Width Joiner, Zero-Width Space, Emoji presentation character, other whitespace)
-const EMOJI_SEPARATOR_REGEX = /[\u200D\u200B\s]|\uFE0F/g;
+// (Zero-Width Space, other whitespace)
+const EMOJI_SEPARATOR_REGEX = /[\u200B\s]/g;
-const BIGEMOJI_REGEX = new RegExp(`^(${EMOJIBASE_REGEX.source})+$`, "i");
+// Regex for emoji. This includes any RGI_Emoji sequence followed by an optional
+// emoji presentation VS (U+FE0F), but not those sequences that are followed by
+// a text presentation VS (U+FE0E). Technically this produces false negatives
+// for emoji followed by U+FE0E when the emoji doesn't have a text variant, but
+// in practice this doesn't matter.
+export const EMOJI_REGEX = /\p{RGI_Emoji}(?!\uFE0E)(?:(? {
expect(html).toMatchInlineSnapshot(`"test foo <b>bar"`);
});
+ it("generates big emoji for emoji made of multiple characters", () => {
+ const { asFragment } = render(bodyToHtml({ body: "๐จโ๐ฉโ๐งโ๐ฆ โ๏ธ ๐ฎ๐ธ", msgtype: "m.text" }, [], {}) as ReactElement);
+
+ expect(asFragment()).toMatchSnapshot();
+ });
+
it("should generate big emoji for an emoji-only reply to a message", () => {
const { asFragment } = render(
bodyToHtml(
@@ -132,6 +138,12 @@ describe("bodyToHtml", () => {
expect(asFragment()).toMatchSnapshot();
});
+ it("does not mistake characters in text presentation mode for emoji", () => {
+ const { asFragment } = render(bodyToHtml({ body: "โ", msgtype: "m.text" }, [], {}) as ReactElement);
+
+ expect(asFragment()).toMatchSnapshot();
+ });
+
describe("feature_latex_maths", () => {
beforeEach(() => {
jest.spyOn(SettingsStore, "getValue").mockImplementation((feature) => feature === "feature_latex_maths");
diff --git a/test/__snapshots__/HtmlUtils-test.tsx.snap b/test/__snapshots__/HtmlUtils-test.tsx.snap
index c33cc46433d3..43bc6e00cfbf 100644
--- a/test/__snapshots__/HtmlUtils-test.tsx.snap
+++ b/test/__snapshots__/HtmlUtils-test.tsx.snap
@@ -1,5 +1,16 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP
+exports[`bodyToHtml does not mistake characters in text presentation mode for emoji 1`] = `
+
hello
$\\xi$
world
"`; exports[`bodyToHtml feature_latex_maths should not mangle divs 1`] = `"hello
hel
exports[`bodyToHtml feature_latex_maths should render inline katex 1`] = `"hello world"`;
+exports[`bodyToHtml generates big emoji for emoji made of multiple characters 1`] = `
+