diff --git a/0001-bugfix-for-CVE-2025-55780.patch b/0001-bugfix-for-CVE-2025-55780.patch new file mode 100644 index 0000000000000000000000000000000000000000..b6c113b263b7388df1eeed41f1e11c8b2ec57e37 --- /dev/null +++ b/0001-bugfix-for-CVE-2025-55780.patch @@ -0,0 +1,331 @@ +From 78e57e1f282f9cc388b409c006eef9673e10087a Mon Sep 17 00:00:00 2001 +Message-ID: <78e57e1f282f9cc388b409c006eef9673e10087a.1758702565.git.mjg@fedoraproject.org> +From: Robin Watts +Date: Fri, 5 Sep 2025 16:26:44 +0100 +Subject: [PATCH] Bug 708720: Fix NULL dereference in HTML layout. + +If we have a single flow node that is too large to fit in the +available width, and we are using the "overflow-wrap:break-word" +CSS style, then we attempt to break the flow node into smaller +pieces so that it can wrap nicely. + +We do this by walking the flow node text to break it into clusters; +we want to break at cluster level rather than character level to +avoid problems with shaping. + +For right to left text, the clusters come in the opposite order to +expected and the existing logic goes wrong. + +This can lead to the splitting code not actually splitting anything +which in turn can lead to node->next being NULL, and us attempting +to dereference NULL. + +The fix is to split differently for right 2 left text. + +While investigating this, an additional problem was spotted, namely +that the way we were calling harfbuzz meant that it didn't group +clusters together as we expected. Accordingly, we extend the code +here so that our 'string_walker' can call harfbuzz in 'grapheme' +cluster mode rather than 'character' cluster node. + +Using that, we then update the code to walk the returned fragments +of the string, breaking the code at the correct position for the +required space, with care taken to cope correctly with both l2r and +r2l text. +--- + source/html/html-layout.c | 200 ++++++++++++++++++++++++++++++++------ + 1 file changed, 170 insertions(+), 30 deletions(-) + +diff --git a/source/html/html-layout.c b/source/html/html-layout.c +index bc7d7cdc6..6407eff0c 100644 +--- a/source/html/html-layout.c ++++ b/source/html/html-layout.c +@@ -33,6 +33,8 @@ + + #undef DEBUG_HARFBUZZ + ++#undef DEBUG_DESPERATE_SPLITTING ++ + /* + Some notes on the layout code below and the concepts used. + +@@ -130,6 +132,7 @@ typedef struct string_walker + hb_glyph_info_t *glyph_info; + unsigned int glyph_count; + int scale; ++ int graphemes; + } string_walker; + + static int quick_ligature_mov(fz_context *ctx, string_walker *walker, unsigned int i, unsigned int n, int unicode) +@@ -177,7 +180,7 @@ static int quick_ligature(fz_context *ctx, string_walker *walker, unsigned int i + return walker->glyph_info[i].codepoint; + } + +-static void init_string_walker(fz_context *ctx, string_walker *walker, hb_buffer_t *hb_buf, int rtl, fz_font *font, int script, int language, int small_caps, const char *text) ++static void init_string_walker(fz_context *ctx, string_walker *walker, hb_buffer_t *hb_buf, int rtl, fz_font *font, int script, int language, int small_caps, const char *text, int cluster_as_graphemes) + { + walker->ctx = ctx; + walker->hb_buf = hb_buf; +@@ -191,6 +194,7 @@ static void init_string_walker(fz_context *ctx, string_walker *walker, hb_buffer + walker->font = NULL; + walker->next_font = NULL; + walker->small_caps = small_caps; ++ walker->graphemes = cluster_as_graphemes; + } + + static void +@@ -261,7 +265,10 @@ static int walk_string(string_walker *walker) + hb_buffer_set_language(walker->hb_buf, hb_language_from_string(lang, (int)strlen(lang))); + Memento_stopLeaking(); /* HarfBuzz leaks harmlessly */ + } +- hb_buffer_set_cluster_level(walker->hb_buf, HB_BUFFER_CLUSTER_LEVEL_CHARACTERS); ++ if (walker->graphemes) ++ hb_buffer_set_cluster_level(walker->hb_buf, HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES); ++ else ++ hb_buffer_set_cluster_level(walker->hb_buf, HB_BUFFER_CLUSTER_LEVEL_CHARACTERS); + + hb_buffer_add_utf8(walker->hb_buf, walker->start, walker->end - walker->start, 0, -1); + +@@ -338,7 +345,7 @@ static void measure_string_w(fz_context *ctx, fz_html_flow *node, hb_buffer_t *h + const char *s; + node->w = 0; + s = get_node_text(ctx, node); +- init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, node->box->style->font, node->script, node->markup_lang, node->box->style->small_caps, s); ++ init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, node->box->style->font, node->script, node->markup_lang, node->box->style->small_caps, s, 0); + while (walk_string(&walker)) + { + int x = 0; +@@ -564,25 +571,70 @@ static int flush_line(fz_context *ctx, fz_html_box *box, layout_data *ld, float + return 0; + } + +-static void break_word_for_overflow_wrap(fz_context *ctx, fz_html_flow *node, layout_data *ld) ++static void split_flow_at_byte_offset(fz_context *ctx, fz_pool *pool, fz_html_flow *flow, size_t offset) ++{ ++ fz_html_flow *new_flow; ++ char *text; ++ size_t len; ++ ++ assert(flow->type == FLOW_WORD); ++ ++ assert(offset != 0); ++ text = flow->content.text + offset; ++ len = strlen(text); ++ new_flow = fz_pool_alloc(ctx, pool, offsetof(fz_html_flow, content) + len+1); ++ memcpy(new_flow, flow, offsetof(fz_html_flow, content)); ++ new_flow->next = flow->next; ++ flow->next = new_flow; ++ strcpy(new_flow->content.text, text); ++ *text = 0; ++} ++ ++/* node becomes last cluster, node->next becomes the rest */ ++static void split_flow_at_byte_offset_reverse(fz_context *ctx, fz_pool *pool, fz_html_flow *flow, size_t offset) ++{ ++ fz_html_flow *new_flow; ++ char *text; ++ size_t len; ++ ++ assert(flow->type == FLOW_WORD); ++ ++ assert(offset != 0); ++ text = flow->content.text + offset; ++ len = strlen(text); ++ new_flow = fz_pool_alloc(ctx, pool, offsetof(fz_html_flow, content) + offset+1); ++ memcpy(new_flow, flow, offsetof(fz_html_flow, content)); ++ new_flow->next = flow->next; ++ flow->next = new_flow; ++ memcpy(new_flow->content.text, flow->content.text, offset); ++ new_flow->content.text[offset] = 0; ++ memmove(flow->content.text, text, len); ++ flow->content.text[len] = 0; ++} ++ ++static void break_word_for_overflow_wrap(fz_context *ctx, fz_html_flow *node, layout_data *ld, float max_w) + { + hb_buffer_t *hb_buf = ld->hb_buf; + const char *text = node->content.text; + string_walker walker; ++ float w = 0; ++ unsigned int at = (unsigned int)-1; ++ float em = node->box->s.layout.em; + + assert(node->type == FLOW_WORD); + assert(node->atomic == 0); + +- /* Split a word node after the first cluster (usually a character), and +- * flag the second half as a valid node to break before if in desperate +- * need. This may break earlier than necessary, but in that case we'll +- * break the second half again when we come to it, until we find a +- * suitable breaking point. ++ /* The entire flow doesn't fit on a line, so we need to break it in the middle. ++ * We need to be careful not to break it in the middle of a cluster, as this ++ * would really mess shaping up. ++ * ++ * For left 2 right text, this means split the first cluster into its own 'atomic' ++ * node, and shorten the remainder. + * +- * We split after each clusters here so we can flag each fragment as +- * "atomic" so we don't try breaking it again, and also to flag the +- * following word fragment as a possible break point. Breaking at the +- * exact desired point would make this more complicated than necessary. ++ * For right 2 left text, we split the last cluster into its own 'atomic' node ++ * and shorten the remainder. This is fine, cos although it appears to change the ++ * logical ordering for the text, we never extract from HTML, and the correct ++ * appearance is preserved. + * + * Desperately breaking in the middle of a word like this should should + * rarely (if ever) come up. +@@ -591,28 +643,116 @@ static void break_word_for_overflow_wrap(fz_context *ctx, fz_html_flow *node, la + */ + + /* Walk string and split at the first cluster. */ +- init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, node->box->style->font, node->script, node->markup_lang, node->box->style->small_caps, text); +- while (walk_string(&walker)) ++ if ((node->bidi_level & 1) == 0) + { +- unsigned int i, a, b; +- a = walker.glyph_info[0].cluster; +- for (i = 0; i < walker.glyph_count; ++i) ++ /* Left 2 Right */ ++ init_string_walker(ctx, &walker, hb_buf, 0 /* L2R */, node->box->style->font, node->script, node->markup_lang, node->box->style->small_caps, text, 1); ++ while (walk_string(&walker)) + { +- b = walker.glyph_info[i].cluster; +- if (b != a) ++ unsigned int i; ++#ifdef DEBUG_DESPERATE_SPLITTING ++ for (i = 0; i < walker.glyph_count; ++i) + { +- fz_html_split_flow(ctx, ld->pool, node, fz_runeidx(text, text + b)); +- node->atomic = 1; +- node->next->overflow_wrap = 1; +- measure_string_w(ctx, node, ld->hb_buf); +- measure_string_w(ctx, node->next, ld->hb_buf); +- return; ++ uint32_t can_break_here = (hb_glyph_info_get_glyph_flags(&walker.glyph_info[i]) & HB_GLYPH_FLAG_UNSAFE_TO_BREAK) == 0; ++ printf("%s(%x, %d)", ++ can_break_here ? "|" : " ", ++ walker.glyph_info[i].codepoint, walker.glyph_info[i].cluster); + } ++ printf("\n"); ++#endif ++ for (i = 0; i < walker.glyph_count; ++i) ++ { ++ uint32_t can_break_here = (hb_glyph_info_get_glyph_flags(&walker.glyph_info[i]) & HB_GLYPH_FLAG_UNSAFE_TO_BREAK) == 0; ++ ++ if (can_break_here) ++ { ++ /* If this fragment would take us beyond the end, then give up. */ ++ if (w > max_w) ++ break; ++ ++ at = walker.start + walker.glyph_info[i].cluster - text; ++ } ++ ++ w += walker.glyph_pos[i].x_advance * em / walker.scale; ++ ++ /* Make sure we have the whole cluster */ ++ while (i+1 < walker.glyph_count && walker.glyph_info[i].cluster == walker.glyph_info[i+1].cluster) ++ { ++ i++; ++ w += walker.glyph_pos[i].x_advance * em / walker.scale; ++ } ++ } ++ } ++ if (at != (unsigned int)-1 && at != 0 && at != strlen(text)) ++ { ++ /* node becomes first cluster, node->next becomes the rest */ ++ split_flow_at_byte_offset(ctx, ld->pool, node, at); ++ node->next->overflow_wrap = 1; ++ measure_string_w(ctx, node, ld->hb_buf); ++ measure_string_w(ctx, node->next, ld->hb_buf); ++ return; ++ } ++ } ++ else ++ { ++ /* Right 2 Left */ ++ init_string_walker(ctx, &walker, hb_buf, 1 /* R2L */, node->box->style->font, node->script, node->markup_lang, node->box->style->small_caps, text, 1); ++ while (walk_string(&walker)) ++ { ++ unsigned int i; ++#ifdef DEBUG_DESPERATE_SPLITTING ++ for (i = 0; i < walker.glyph_count; ++i) ++ { ++ uint32_t can_break_here = (hb_glyph_info_get_glyph_flags(&walker.glyph_info[i]) & HB_GLYPH_FLAG_UNSAFE_TO_BREAK) == 0; ++ printf("%s(%x, %d)", ++ can_break_here ? "|" : " ", ++ walker.glyph_info[i].codepoint, walker.glyph_info[i].cluster); ++ } ++ printf("\n"); ++#endif ++ /* Find the first cluster we can break before. */ ++ ++ /* We can always break at the start of a fragment returned by walk_string ++ * (unless it's the very first one!) */ ++ if (w > max_w) ++ break; ++ if (walker.start != text) ++ at = walker.start - text; ++ ++ for(i = 0; i < walker.glyph_count; i++) ++ { ++ uint32_t can_break_here = (hb_glyph_info_get_glyph_flags(&walker.glyph_info[i]) & HB_GLYPH_FLAG_UNSAFE_TO_BREAK) == 0; ++ ++ w += walker.glyph_pos[i].x_advance * em / walker.scale; ++ ++ if (can_break_here) ++ { ++ /* If this fragment would take us beyond the end, then give up. */ ++ if (w > max_w) ++ break; ++ ++ if (i == walker.glyph_count) ++ at = walker.start - text; ++ else ++ at = walker.start + walker.glyph_info[i].cluster - text; ++ } ++ } ++ } ++ if (at != (unsigned int)-1 && at != 0 && at != strlen(text)) ++ { ++ /* Split at the last point found */ ++ /* node becomes last cluster, node->next becomes the rest */ ++ split_flow_at_byte_offset_reverse(ctx, ld->pool, node, at); ++ node->next->overflow_wrap = 1; ++ measure_string_w(ctx, node, ld->hb_buf); ++ measure_string_w(ctx, node->next, ld->hb_buf); ++ return; + } + } + +- /* Word is already only one cluster. Don't try breaking here again! */ +- node->atomic = 1; ++ /* Unless we've overflowed word is already only one cluster. Don't try breaking here again! */ ++ if (w <= max_w) ++ node->atomic = 1; + } + + /* +@@ -787,7 +927,7 @@ static void layout_flow(fz_context *ctx, layout_data *ld, fz_html_box *box, fz_h + { + if (!node->atomic && node->box->style->overflow_wrap == OVERFLOW_WRAP_BREAK_WORD) + { +- break_word_for_overflow_wrap(ctx, node, ld); ++ break_word_for_overflow_wrap(ctx, node, ld, box->s.layout.w - line_w); + } + } + /* Remember overflow-wrap word fragments, unless at the beginning of a line. */ +@@ -2003,7 +2143,7 @@ static int draw_flow_box(fz_context *ctx, fz_html_box *box, float page_top, floa + trm.f = y - page_top; + + s = get_node_text(ctx, node); +- init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, style->font, node->script, node->markup_lang, style->small_caps, s); ++ init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, style->font, node->script, node->markup_lang, style->small_caps, s, 0); + while (walk_string(&walker)) + { + float node_scale = node->box->s.layout.em / walker.scale; +-- +2.51.0.544.g305ed5581e + diff --git a/mupdf.spec b/mupdf.spec index 56f65172802d657861f22cbc8235a57a4ea0d977..b844ee7602d10c44d7043f3843054652af19085a 100644 --- a/mupdf.spec +++ b/mupdf.spec @@ -1,4 +1,4 @@ -%define anolis_release 2 +%define anolis_release 3 # Desired jbig2dec header files and library version # Apparantly, jbig2dec complains even about newer versions. @@ -16,6 +16,10 @@ Source0: http://mupdf.com/downloads/archive/%{name}-%{upversion}-source.t Source1: %{name}.desktop Source2: %{name}-gl.desktop +# https://cgit.ghostscript.com/cgi-bin/cgit.cgi/mupdf.git/commit/?id=bdd5d241748807378a78a622388e0312332513c5 +# https://src.fedoraproject.org/rpms/mupdf/c/8c9d86df2c9ddcad73e37a02b30266783597a006?branch=rawhide +Patch1: 0001-bugfix-for-CVE-2025-55780.patch + BuildRequires: gcc gcc-c++ make binutils desktop-file-utils coreutils pkgconfig BuildRequires: openjpeg2-devel desktop-file-utils BuildRequires: libjpeg-devel freetype-devel libXext-devel curl-devel @@ -131,6 +135,9 @@ cd %{buildroot}/%{_bindir} && ln -s %{name}-x11 %{name} %doc README CHANGES docs/* %changelog +* Mon Nov 17 2025 YangCheng - 1.25.0-3 +- Add patch to fix CVE-2025-55780 + * Thu Mar 27 2025 Hong Wei Qin - 1.25.0-2 - Rebuild with openjpeg2-devel