diff --git a/backport-Add-missing-encode-for-custom-term.patch b/backport-Add-missing-encode-for-custom-term.patch new file mode 100644 index 0000000000000000000000000000000000000000..6e91b2ef8c630f62d58bc9357667f65506e2007f --- /dev/null +++ b/backport-Add-missing-encode-for-custom-term.patch @@ -0,0 +1,32 @@ +From 4444a04ece4c02a7bd51e8c75623f22dc12d882b Mon Sep 17 00:00:00 2001 +From: Sutou Kouhei +Date: Sun, 2 Jun 2024 16:59:16 +0900 +Subject: [PATCH] Add missing encode for custom term + +--- + .bundle/gems/rexml-3.2.5/lib/rexml/source.rb | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +index 4483aec..999f467 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +@@ -163,6 +163,7 @@ module REXML + end + + def read(term = nil) ++ term = encode(term) if term + begin + @scanner << readline(term) + true +@@ -174,6 +175,7 @@ module REXML + + def read_until(term) + pattern = /#{Regexp.escape(term)}/ ++ term = encode(term) + begin + until str = @scanner.scan_until(pattern) + @scanner << readline(term) +-- +2.43.0 + diff --git a/backport-CVE-2024-35176.patch b/backport-CVE-2024-35176.patch new file mode 100644 index 0000000000000000000000000000000000000000..c943a725559214b248a879aede43915092473983 --- /dev/null +++ b/backport-CVE-2024-35176.patch @@ -0,0 +1,107 @@ +From 4325835f92f3f142ebd91a3fdba4e1f1ab7f1cfb Mon Sep 17 00:00:00 2001 +From: Nobuyoshi Nakada +Date: Thu, 16 May 2024 11:26:51 +0900 +Subject: [PATCH] Read quoted attributes in chunks (#126) + +--- + .bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb | 20 ++++++++++---------- + .bundle/gems/rexml-3.2.5/lib/rexml/source.rb | 29 ++++++++++++++++++++++++----- + 4 files changed, 46 insertions(+), 15 deletions(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +index 8d62391..d09237c 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +@@ -628,17 +628,17 @@ module REXML + message = "Missing attribute equal: <#{name}>" + raise REXML::ParseException.new(message, @source) + end +- unless match = @source.match(/(['"])(.*?)\1\s*/um, true) +- if match = @source.match(/(['"])/, true) +- message = +- "Missing attribute value end quote: <#{name}>: <#{match[1]}>" +- raise REXML::ParseException.new(message, @source) +- else +- message = "Missing attribute value start quote: <#{name}>" +- raise REXML::ParseException.new(message, @source) +- end ++ unless match = @source.match(/(['"])/, true) ++ message = "Missing attribute value start quote: <#{name}>" ++ raise REXML::ParseException.new(message, @source) ++ end ++ quote = match[1] ++ value = @source.read_until(quote) ++ unless value.chomp!(quote) ++ message = "Missing attribute value end quote: <#{name}>: <#{quote}>" ++ raise REXML::ParseException.new(message, @source) + end +- value = match[2] ++ @source.match(/\s*/um, true) + if prefix == "xmlns" + if local_part == "xml" + if value != "http://www.w3.org/XML/1998/namespace" +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +index 7f47c2b..999751b 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +@@ -65,7 +65,11 @@ module REXML + encoding_updated + end + +- def read ++ def read(term = nil) ++ end ++ ++ def read_until(term) ++ @scanner.scan_until(Regexp.union(term)) or @scanner.rest + end + + def ensure_buffer +@@ -158,9 +162,9 @@ module REXML + end + end + +- def read ++ def read(term = nil) + begin +- @scanner << readline ++ @scanner << readline(term) + true + rescue Exception, NameError + @source = nil +@@ -168,6 +172,21 @@ module REXML + end + end + ++ def read_until(term) ++ pattern = Regexp.union(term) ++ data = [] ++ begin ++ until str = @scanner.scan_until(pattern) ++ @scanner << readline(term) ++ end ++ rescue EOFError ++ @scanner.rest ++ else ++ read if @scanner.eos? and !@source.eof? ++ str ++ end ++ end ++ + def ensure_buffer + read if @scanner.eos? && @source + end +@@ -218,8 +237,8 @@ module REXML + end + + private +- def readline +- str = @source.readline(@line_break) ++ def readline(term = nil) ++ str = @source.readline(term || @line_break) + if @pending_buffer + if str.nil? + str = @pending_buffer +-- +2.43.0 + diff --git a/backport-Change-attribute.has_key-name-to-attributes-name-.-1.patch b/backport-Change-attribute.has_key-name-to-attributes-name-.-1.patch new file mode 100644 index 0000000000000000000000000000000000000000..a7623b7a037acb6698842dc4685031e9351d8cb5 --- /dev/null +++ b/backport-Change-attribute.has_key-name-to-attributes-name-.-1.patch @@ -0,0 +1,118 @@ +From 030bfb4cf91f218a481de5c661c7a689f48971d5 Mon Sep 17 00:00:00 2001 +From: NAITOH Jun +Date: Fri, 22 Mar 2024 22:28:00 +0900 +Subject: [PATCH] Change `attribute.has_key?(name)` to ` attributes[name]`. + (#121) + +## Why? +`attributes[name]` is faster than `attribute.has_key?(name)` in Micro +Benchmark. + +However, the Benchmark did not show a significant difference. +Would like to merge if possible, how about it? + +See: https://github.com/ruby/rexml/pull/119#discussion_r1525611640 + +## Micro Benchmark + +``` +$ cat benchmark/attributes.yaml +loop_count: 100000 +contexts: + - name: No YJIT + prelude: | + $LOAD_PATH.unshift(File.expand_path("lib")) + require 'rexml' + - name: YJIT + prelude: | + $LOAD_PATH.unshift(File.expand_path("lib")) + require 'rexml' + RubyVM::YJIT.enable + +prelude: | + attributes = {} + name = :a + +benchmark: + 'attributes[name]' : attributes[name] + 'attributes.has_key?(name)' : attributes.has_key?(name) +``` + +``` +$ benchmark-driver benchmark/attributes.yaml +Calculating ------------------------------------- + No YJIT YJIT + attributes[name] 53.362M 53.562M i/s - 100.000k times in 0.001874s 0.001867s +attributes.has_key?(name) 45.025M 45.005M i/s - 100.000k times in 0.002221s 0.002222s + +Comparison: + attributes[name] + YJIT: 53561863.6 i/s + No YJIT: 53361791.1 i/s - 1.00x slower + + attributes.has_key?(name) + No YJIT: 45024765.3 i/s + YJIT: 45004502.0 i/s - 1.00x slower +``` + +## Benchmark + +``` +RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/naitoh/.rbenv/versions/3.3.0/bin/ruby -v -S benchmark-driver /Users/naitoh/ghq/github.com/naitoh/rexml/benchmark/parse.yaml +ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin22] +Calculating ------------------------------------- + before after before(YJIT) after(YJIT) + dom 10.786 10.783 18.196 17.959 i/s - 100.000 times in 9.270908s 9.273657s 5.495854s 5.568326s + sax 30.213 30.430 57.030 56.672 i/s - 100.000 times in 3.309845s 3.286240s 1.753459s 1.764551s + pull 35.211 35.259 70.817 70.784 i/s - 100.000 times in 2.840056s 2.836136s 1.412098s 1.412754s + stream 34.281 34.475 63.084 62.978 i/s - 100.000 times in 2.917067s 2.900689s 1.585196s 1.587860s + +Comparison: + dom + before(YJIT): 18.2 i/s + after(YJIT): 18.0 i/s - 1.01x slower + before: 10.8 i/s - 1.69x slower + after: 10.8 i/s - 1.69x slower + + sax + before(YJIT): 57.0 i/s + after(YJIT): 56.7 i/s - 1.01x slower + after: 30.4 i/s - 1.87x slower + before: 30.2 i/s - 1.89x slower + + pull + before(YJIT): 70.8 i/s + after(YJIT): 70.8 i/s - 1.00x slower + after: 35.3 i/s - 2.01x slower + before: 35.2 i/s - 2.01x slower + + stream + before(YJIT): 63.1 i/s + after(YJIT): 63.0 i/s - 1.00x slower + after: 34.5 i/s - 1.83x slower + before: 34.3 i/s - 1.84x slower + +``` + +- YJIT=ON : 0.98x - 1.00x faster +- YJIT=OFF : 1.00x - 1.00x faster +--- + .bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +index f66b968..8d62391 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +@@ -656,7 +656,7 @@ module REXML + prefixes << prefix unless prefix == "xml" + end + +- if attributes.has_key?(name) ++ if attributes[name] + msg = "Duplicate attribute #{name.inspect}" + raise REXML::ParseException.new(msg, @source, self) + end +-- +2.43.0 + diff --git a/backport-Change-loop-in-parse_attributes-to-while-true-.-109.patch.patch b/backport-Change-loop-in-parse_attributes-to-while-true-.-109.patch.patch new file mode 100644 index 0000000000000000000000000000000000000000..35949dac4c36935c5f6fa931efacde9ea2330b93 --- /dev/null +++ b/backport-Change-loop-in-parse_attributes-to-while-true-.-109.patch.patch @@ -0,0 +1,70 @@ +From 7e4049f6a68c99c4efec2df117057ee080680c9f Mon Sep 17 00:00:00 2001 +From: NAITOH Jun +Date: Wed, 31 Jan 2024 17:17:51 +0900 +Subject: [PATCH] Change loop in parse_attributes to `while true`. (#109) + +## Why + +loop is slower than `while true`. + +## Benchmark + +``` +RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/naitoh/.rbenv/versions/3.3.0/bin/ruby -v -S benchmark-driver /Users/naitoh/ghq/github.com/naitoh/rexml/benchmark/parse.yaml +ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin22] +Calculating ------------------------------------- + before after before(YJIT) after(YJIT) + dom 11.186 11.304 17.395 17.450 i/s - 100.000 times in 8.940144s 8.846590s 5.748718s 5.730793s + sax 30.811 31.629 47.352 48.040 i/s - 100.000 times in 3.245601s 3.161619s 2.111854s 2.081594s + pull 35.793 36.621 56.924 57.313 i/s - 100.000 times in 2.793829s 2.730693s 1.756732s 1.744812s + stream 33.157 34.757 46.792 50.536 i/s - 100.000 times in 3.015940s 2.877088s 2.137106s 1.978787s + +Comparison: + dom + after(YJIT): 17.4 i/s + before(YJIT): 17.4 i/s - 1.00x slower + after: 11.3 i/s - 1.54x slower + before: 11.2 i/s - 1.56x slower + + sax + after(YJIT): 48.0 i/s + before(YJIT): 47.4 i/s - 1.01x slower + after: 31.6 i/s - 1.52x slower + before: 30.8 i/s - 1.56x slower + + pull + after(YJIT): 57.3 i/s + before(YJIT): 56.9 i/s - 1.01x slower + after: 36.6 i/s - 1.57x slower + before: 35.8 i/s - 1.60x slower + + stream + after(YJIT): 50.5 i/s + before(YJIT): 46.8 i/s - 1.08x slower + after: 34.8 i/s - 1.45x slower + before: 33.2 i/s - 1.52x slower + +``` + +- YJIT=ON : 1.00x - 1.08x faster +- YJIT=OFF : 1.01x - 1.04x faster +--- + .bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +index b66b0ed..3fe5c29 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +@@ -610,7 +610,7 @@ module REXML + end + + pos = scanner.pos +- loop do ++ while true + break if scanner.scan(ATTRIBUTE_PATTERN) + unless scanner.scan(QNAME) + message = "Invalid attribute name: <#{scanner.rest}>" +-- +2.43.0 + diff --git a/backport-Don-t-include-private_constant-ed-module-155.patch b/backport-Don-t-include-private_constant-ed-module-155.patch new file mode 100644 index 0000000000000000000000000000000000000000..bb6593408d932326e27c2a5e6c0e3956ac1b9829 --- /dev/null +++ b/backport-Don-t-include-private_constant-ed-module-155.patch @@ -0,0 +1,96 @@ +From cfa8dd90077000f21f55a6b7e5f041e2b4fd5e04 Mon Sep 17 00:00:00 2001 +From: NAITOH Jun +Date: Sat, 22 Jun 2024 14:21:28 +0900 +Subject: [PATCH] Don't include private_constant-ed module (#155) + +Included constants are not private. So private constants in private +module aren't private. + +See also: https://github.com/ruby/rexml/pull/154#discussion_r1649469269 +--- + .../rexml-3.2.5/lib/rexml/parsers/baseparser.rb | 13 ++++++------- + .bundle/gems/rexml-3.2.5/lib/rexml/source.rb | 1 - + 2 files changed, 6 insertions(+), 8 deletions(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +index f58a16b..b00ddf2 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +@@ -123,7 +123,6 @@ module REXML + ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um + end + private_constant :Private +- include Private + + def initialize( source ) + self.stream = source +@@ -284,7 +283,7 @@ module REXML + raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil? + return [ :elementdecl, " +Date: Sat, 22 Jun 2024 10:42:44 +0900 +Subject: [PATCH] Fix a bug that a large XML can't be parsed (#154) + +GitHub: fix GH-150 + +If a parsed XML is later than , we can't parse it. Because +s position is stored as . We can avoid the +restriction by dropping large parsed content. + +Co-authored-by: Sutou Kouhei +--- + .bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb | 2 ++ + .bundle/gems/rexml-3.2.5/lib/rexml/source.rb | 7 +++++++ + 2 files changed, 9 insertions(+) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +index 4f6b14e..f58a16b 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +@@ -193,6 +193,8 @@ module REXML + + # Returns the next event. This is a +PullEvent+ object. + def pull ++ @source.drop_parsed_content ++ + pull_event.tap do |event| + @listeners.each do |listener| + listener.receive event +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +index 982aa84..bce8160 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +@@ -35,6 +35,7 @@ module REXML + attr_reader :encoding + + module Private ++ SCANNER_RESET_SIZE = 100000 + PRE_DEFINED_TERM_PATTERNS = {} + pre_defined_terms = ["'", '"', "<"] + pre_defined_terms.each do |term| +@@ -64,6 +65,12 @@ module REXML + @scanner.rest + end + ++ def drop_parsed_content ++ if @scanner.pos > Private::SCANNER_RESET_SIZE ++ @scanner.string = @scanner.rest ++ end ++ end ++ + def buffer_encoding=(encoding) + @scanner.string.force_encoding(encoding) + end +-- +2.33.0 + + diff --git a/backport-Improve-text-parse-performance.patch b/backport-Improve-text-parse-performance.patch new file mode 100644 index 0000000000000000000000000000000000000000..b8e5ed64934f3b8e630f65c92a04920c3cd80800 --- /dev/null +++ b/backport-Improve-text-parse-performance.patch @@ -0,0 +1,100 @@ +From e06b3fb2660c682423e10d59b92d192c42e9825d Mon Sep 17 00:00:00 2001 +From: Sutou Kouhei +Date: Fri, 7 Jun 2024 14:34:25 +0900 +Subject: [PATCH] Improve text parse performance + +If there are many ">"s in text, parsing is very slow. + + Calculating ------------------------------------- + rexml 3.2.6 master 3.2.6(YJIT) master(YJIT) + attribute 1.116 3.618k 1.117 1.941k i/s - 10.000 times in 8.957748s 0.002764s 8.951665s 0.005152s + text 27.089 2.262k 42.632 1.033k i/s - 10.000 times in 0.369147s 0.004421s 0.234566s 0.009683s + + Comparison: + attribute + master: 3617.6 i/s + master(YJIT): 1941.1 i/s - 1.86x slower + 3.2.6(YJIT): 1.1 i/s - 3238.31x slower + rexml 3.2.6: 1.1 i/s - 3240.51x slower + + text + master: 2261.8 i/s + master(YJIT): 1032.7 i/s - 2.19x slower + 3.2.6(YJIT): 42.6 i/s - 53.05x slower + rexml 3.2.6: 27.1 i/s - 83.49x slower +--- + .../lib/rexml/parsers/baseparser.rb | 10 ++++++++-- + .bundle/gems/rexml-3.2.5/lib/rexml/source.rb | 19 ++++++++++--------- + 2 files changed, 18 insertions(+), 11 deletions(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +index 9d20044..18fec5e 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +@@ -362,6 +362,10 @@ module REXML + begin + start_position = @source.position + if @source.match("<", true) ++ # :text's read_until may remain only "<" in buffer. In the ++ # case, buffer is empty here. So we need to fill buffer ++ # here explicitly. ++ @source.ensure_buffer + if @source.match("/", true) + @nsstack.shift + last_tag = @tags.pop +@@ -427,8 +431,10 @@ module REXML + return [ :start_element, tag, attributes ] + end + else +- md = @source.match(/([^<]*)/um, true) +- text = md[1] ++ text = @source.read_until("<") ++ if text.chomp!("<") ++ @source.position -= "<".bytesize ++ end + return [ :text, text ] + end + rescue REXML::UndefinedNamespaceException +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +index 542b76a..982aa84 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +@@ -36,7 +36,7 @@ module REXML + + module Private + PRE_DEFINED_TERM_PATTERNS = {} +- pre_defined_terms = ["'", '"'] ++ pre_defined_terms = ["'", '"', "<"] + pre_defined_terms.each do |term| + PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/ + end +@@ -192,17 +192,18 @@ module REXML + def read_until(term) + pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/ + term = encode(term) +- begin +- until str = @scanner.scan_until(pattern) +- @scanner << readline(term) +- end +- rescue EOFError ++ until str = @scanner.scan_until(pattern) ++ break if @source.nil? ++ break if @source.eof? ++ @scanner << readline(term) ++ end ++ if str ++ read if @scanner.eos? and !@source.eof? ++ str ++ else + rest = @scanner.rest + @scanner.pos = @scanner.string.bytesize + rest +- else +- read if @scanner.eos? and !@source.eof? +- str + end + end + +-- +2.43.0 + diff --git a/backport-Optimize-Source-read_until-method-135.patch b/backport-Optimize-Source-read_until-method-135.patch new file mode 100644 index 0000000000000000000000000000000000000000..8ecb938331b30454f00a0eab990e7f8298197940 --- /dev/null +++ b/backport-Optimize-Source-read_until-method-135.patch @@ -0,0 +1,96 @@ +From 037c16a5768d25d69570ccce73b2eb78b559a9b4 Mon Sep 17 00:00:00 2001 +From: NAITOH Jun +Date: Mon, 3 Jun 2024 10:24:24 +0900 +Subject: [PATCH] Optimize Source#read_until method (#135) + +Optimize `Source#read_until` method. + +## Benchmark +``` +RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/naitoh/.rbenv/versions/3.3.0/bin/ruby -v -S benchmark-driver /Users/naitoh/ghq/github.com/naitoh/rexml/benchmark/parse.yaml +ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin22] +Calculating ------------------------------------- + before after before(YJIT) after(YJIT) + dom 9.877 9.992 15.605 17.559 i/s - 100.000 times in 10.124592s 10.008017s 6.408031s 5.695167s + sax 22.903 25.151 39.482 50.846 i/s - 100.000 times in 4.366300s 3.975922s 2.532822s 1.966706s + pull 25.940 30.474 44.685 61.450 i/s - 100.000 times in 3.855070s 3.281511s 2.237879s 1.627346s + stream 25.255 29.500 41.819 53.605 i/s - 100.000 times in 3.959539s 3.389825s 2.391256s 1.865505s + +Comparison: + dom + after(YJIT): 17.6 i/s + before(YJIT): 15.6 i/s - 1.13x slower + after: 10.0 i/s - 1.76x slower + before: 9.9 i/s - 1.78x slower + + sax + after(YJIT): 50.8 i/s + before(YJIT): 39.5 i/s - 1.29x slower + after: 25.2 i/s - 2.02x slower + before: 22.9 i/s - 2.22x slower + + pull + after(YJIT): 61.4 i/s + before(YJIT): 44.7 i/s - 1.38x slower + after: 30.5 i/s - 2.02x slower + before: 25.9 i/s - 2.37x slower + + stream + after(YJIT): 53.6 i/s + before(YJIT): 41.8 i/s - 1.28x slower + after: 29.5 i/s - 1.82x slower + before: 25.3 i/s - 2.12x slower + +``` + +- YJIT=ON : 1.13x - 1.38x faster +- YJIT=OFF : 1.01x - 1.17x faster + +Co-authored-by: Sutou Kouhei +--- + .bundle/gems/rexml-3.2.5/lib/rexml/source.rb | 15 +++++++++++++-- + 1 file changed, 13 insertions(+), 2 deletions(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +index 3be3f84..542b76a 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +@@ -34,6 +34,16 @@ module REXML + attr_reader :line + attr_reader :encoding + ++ module Private ++ PRE_DEFINED_TERM_PATTERNS = {} ++ pre_defined_terms = ["'", '"'] ++ pre_defined_terms.each do |term| ++ PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/ ++ end ++ end ++ private_constant :Private ++ include Private ++ + # Constructor + # @param arg must be a String, and should be a valid XML document + # @param encoding if non-null, sets the encoding of the source to this +@@ -69,7 +79,8 @@ module REXML + end + + def read_until(term) +- data = @scanner.scan_until(/#{Regexp.escape(term)}/) ++ pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/ ++ data = @scanner.scan_until(pattern) + unless data + data = @scanner.rest + @scanner.pos = @scanner.string.bytesize +@@ -179,7 +190,7 @@ module REXML + end + + def read_until(term) +- pattern = /#{Regexp.escape(term)}/ ++ pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/ + term = encode(term) + begin + until str = @scanner.scan_until(pattern) +-- +2.43.0 + diff --git a/backport-Optimize-the-parse_attributes-method-to-use-Source-m.patch b/backport-Optimize-the-parse_attributes-method-to-use-Source-m.patch new file mode 100644 index 0000000000000000000000000000000000000000..c43a6b6776d628810dd29633bc82c966d79054f4 --- /dev/null +++ b/backport-Optimize-the-parse_attributes-method-to-use-Source-m.patch @@ -0,0 +1,209 @@ +From 0496940d5998ccbc50d16fb734993ab50fc60c2d Mon Sep 17 00:00:00 2001 +From: NAITOH Jun +Date: Mon, 18 Mar 2024 23:30:47 +0900 +Subject: [PATCH] Optimize the parse_attributes method to use `Source#match` + to parse XML. (#119) + +## Why? + +Improve maintainability by consolidating processing into `Source#match`. + +## Benchmark +``` +RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/naitoh/.rbenv/versions/3.3.0/bin/ruby -v -S benchmark-driver /Users/naitoh/ghq/github.com/naitoh/rexml/benchmark/parse.yaml +ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin22] +Calculating ------------------------------------- + before after before(YJIT) after(YJIT) + dom 10.891 10.622 16.356 17.403 i/s - 100.000 times in 9.182130s 9.414177s 6.113806s 5.746133s + sax 30.335 29.845 49.749 54.877 i/s - 100.000 times in 3.296483s 3.350595s 2.010071s 1.822259s + pull 35.514 34.801 61.123 66.908 i/s - 100.000 times in 2.815793s 2.873484s 1.636041s 1.494591s + stream 35.141 34.475 52.110 56.836 i/s - 100.000 times in 2.845646s 2.900638s 1.919017s 1.759456s + +Comparison: + dom + after(YJIT): 17.4 i/s + before(YJIT): 16.4 i/s - 1.06x slower + before: 10.9 i/s - 1.60x slower + after: 10.6 i/s - 1.64x slower + + sax + after(YJIT): 54.9 i/s + before(YJIT): 49.7 i/s - 1.10x slower + before: 30.3 i/s - 1.81x slower + after: 29.8 i/s - 1.84x slower + + pull + after(YJIT): 66.9 i/s + before(YJIT): 61.1 i/s - 1.09x slower + before: 35.5 i/s - 1.88x slower + after: 34.8 i/s - 1.92x slower + + stream + after(YJIT): 56.8 i/s + before(YJIT): 52.1 i/s - 1.09x slower + before: 35.1 i/s - 1.62x slower + after: 34.5 i/s - 1.65x slower + +``` + +- YJIT=ON : 1.06x - 1.10x faster +- YJIT=OFF : 0.97x - 0.98x faster +--- + .bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb | 116 ++++++++++++-------------------- + 1 file changed, 44 insertions(+), 72 deletions(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +index c01b087..f66b968 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +@@ -114,7 +114,7 @@ module REXML + + module Private + INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um +- TAG_PATTERN = /((?>#{QNAME_STR}))/um ++ TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um + CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um + ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um + NAME_PATTERN = /\s*#{NAME}/um +@@ -128,7 +128,6 @@ module REXML + def initialize( source ) + self.stream = source + @listeners = [] +- @attributes_scanner = StringScanner.new('') + end + + def add_listener( listener ) +@@ -614,87 +613,60 @@ module REXML + def parse_attributes(prefixes, curr_ns) + attributes = {} + closed = false +- match_data = @source.match(/^(.*?)(\/)?>/um, true) +- if match_data.nil? +- message = "Start tag isn't ended" +- raise REXML::ParseException.new(message, @source) +- end +- +- raw_attributes = match_data[1] +- closed = !match_data[2].nil? +- return attributes, closed if raw_attributes.nil? +- return attributes, closed if raw_attributes.empty? +- +- @attributes_scanner.string = raw_attributes +- scanner = @attributes_scanner +- until scanner.eos? +- if scanner.scan(/\s+/) +- break if scanner.eos? +- end +- +- start_position = scanner.pos +- while true +- break if scanner.scan(ATTRIBUTE_PATTERN) +- unless scanner.scan(QNAME) +- message = "Invalid attribute name: <#{scanner.rest}>" +- raise REXML::ParseException.new(message, @source) +- end +- name = scanner[0] +- unless scanner.scan(/\s*=\s*/um) ++ while true ++ if @source.match(">", true) ++ return attributes, closed ++ elsif @source.match("/>", true) ++ closed = true ++ return attributes, closed ++ elsif match = @source.match(QNAME, true) ++ name = match[1] ++ prefix = match[2] ++ local_part = match[3] ++ ++ unless @source.match(/\s*=\s*/um, true) + message = "Missing attribute equal: <#{name}>" + raise REXML::ParseException.new(message, @source) + end +- quote = scanner.scan(/['"]/) +- unless quote +- message = "Missing attribute value start quote: <#{name}>" +- raise REXML::ParseException.new(message, @source) +- end +- unless scanner.scan(/.*#{Regexp.escape(quote)}/um) +- @source.ensure_buffer +- match_data = @source.match(/^(.*?)(\/)?>/um, true) +- if match_data +- scanner << "/" if closed +- scanner << ">" +- scanner << match_data[1] +- scanner.pos = start_position +- closed = !match_data[2].nil? +- next ++ unless match = @source.match(/(['"])(.*?)\1\s*/um, true) ++ if match = @source.match(/(['"])/, true) ++ message = ++ "Missing attribute value end quote: <#{name}>: <#{match[1]}>" ++ raise REXML::ParseException.new(message, @source) ++ else ++ message = "Missing attribute value start quote: <#{name}>" ++ raise REXML::ParseException.new(message, @source) + end +- message = +- "Missing attribute value end quote: <#{name}>: <#{quote}>" +- raise REXML::ParseException.new(message, @source) + end +- end +- name = scanner[1] +- prefix = scanner[2] +- local_part = scanner[3] +- # quote = scanner[4] +- value = scanner[5] +- if prefix == "xmlns" +- if local_part == "xml" +- if value != "http://www.w3.org/XML/1998/namespace" +- msg = "The 'xml' prefix must not be bound to any other namespace "+ ++ value = match[2] ++ if prefix == "xmlns" ++ if local_part == "xml" ++ if value != "http://www.w3.org/XML/1998/namespace" ++ msg = "The 'xml' prefix must not be bound to any other namespace "+ ++ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" ++ raise REXML::ParseException.new( msg, @source, self ) ++ end ++ elsif local_part == "xmlns" ++ msg = "The 'xmlns' prefix must not be declared "+ + "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" +- raise REXML::ParseException.new( msg, @source, self ) ++ raise REXML::ParseException.new( msg, @source, self) + end +- elsif local_part == "xmlns" +- msg = "The 'xmlns' prefix must not be declared "+ +- "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" +- raise REXML::ParseException.new( msg, @source, self) ++ curr_ns << local_part ++ elsif prefix ++ prefixes << prefix unless prefix == "xml" + end +- curr_ns << local_part +- elsif prefix +- prefixes << prefix unless prefix == "xml" +- end + +- if attributes.has_key?(name) +- msg = "Duplicate attribute #{name.inspect}" +- raise REXML::ParseException.new(msg, @source, self) +- end ++ if attributes.has_key?(name) ++ msg = "Duplicate attribute #{name.inspect}" ++ raise REXML::ParseException.new(msg, @source, self) ++ end + +- attributes[name] = value ++ attributes[name] = value ++ else ++ message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>" ++ raise REXML::ParseException.new(message, @source) ++ end + end +- return attributes, closed + end + end + end +-- +2.43.0 + diff --git a/backport-Reduce-calls-to-Source-buffer-StringScanner-rest-106.patch b/backport-Reduce-calls-to-Source-buffer-StringScanner-rest-106.patch new file mode 100644 index 0000000000000000000000000000000000000000..2f7810040e817ce8825efa1d5754d37f7a0d5234 --- /dev/null +++ b/backport-Reduce-calls-to-Source-buffer-StringScanner-rest-106.patch @@ -0,0 +1,99 @@ +From 83ca5c4b0f76cf7b307dd1be1dc934e1e8199863 Mon Sep 17 00:00:00 2001 +From: NAITOH Jun +Date: Sun, 21 Jan 2024 06:11:42 +0900 +Subject: [PATCH] Reduce calls to `Source#buffer`(`StringScanner#rest`) (#106) + +Reduce calls to `Source#buffer`(`StringScanner#rest`) + +## Why +`Source#buffer` calling `StringScanner#rest`. +`StringScanner#rest` is slow. +Reduce calls to `Source#buffer`. + +## Benchmark + +``` +RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/naitoh/.rbenv/versions/3.3.0/bin/ruby -v -S benchmark-driver /Users/naitoh/ghq/github.com/naitoh/rexml/benchmark/parse.yaml +ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin22] +Calculating ------------------------------------- + before after before(YJIT) after(YJIT) + dom 10.639 10.985 16.213 16.221 i/s - 100.000 times in 9.399033s 9.103461s 6.167962s 6.164794s + sax 28.357 29.440 42.900 44.375 i/s - 100.000 times in 3.526479s 3.396688s 2.331024s 2.253511s + pull 32.852 34.210 48.976 51.273 i/s - 100.000 times in 3.043965s 2.923140s 2.041816s 1.950344s + stream 30.821 31.908 43.953 44.697 i/s - 100.000 times in 3.244539s 3.134020s 2.275172s 2.237310s + +Comparison: + dom + after(YJIT): 16.2 i/s + before(YJIT): 16.2 i/s - 1.00x slower + after: 11.0 i/s - 1.48x slower + before: 10.6 i/s - 1.52x slower + + sax + after(YJIT): 44.4 i/s + before(YJIT): 42.9 i/s - 1.03x slower + after: 29.4 i/s - 1.51x slower + before: 28.4 i/s - 1.56x slower + + pull + after(YJIT): 51.3 i/s + before(YJIT): 49.0 i/s - 1.05x slower + after: 34.2 i/s - 1.50x slower + before: 32.9 i/s - 1.56x slower + + stream + after(YJIT): 44.7 i/s + before(YJIT): 44.0 i/s - 1.02x slower + after: 31.9 i/s - 1.40x slower + before: 30.8 i/s - 1.45x slower + +``` + +- YJIT=ON : 1.00x - 1.05x faster +- YJIT=OFF : 1.03x - 1.04x faster +--- + .bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +index 65bad26..7126a12 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +@@ -348,9 +348,13 @@ module REXML + @source.match(/\A\s*/um, true) + end + begin +- @source.read if @source.buffer.size<2 +- if @source.buffer[0] == ?< +- if @source.buffer[1] == ?/ ++ next_data = @source.buffer ++ if next_data.size < 2 ++ @source.read ++ next_data = @source.buffer ++ end ++ if next_data[0] == ?< ++ if next_data[1] == ?/ + @nsstack.shift + last_tag = @tags.pop + md = @source.match( CLOSE_MATCH, true ) +@@ -364,7 +368,7 @@ module REXML + raise REXML::ParseException.new(message, @source) + end + return [ :end_element, last_tag ] +- elsif @source.buffer[1] == ?! ++ elsif next_data[1] == ?! + md = @source.match(/\A(\s*[^>]*>)/um) + #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}" + raise REXML::ParseException.new("Malformed node", @source) unless md +@@ -383,7 +387,7 @@ module REXML + end + raise REXML::ParseException.new( "Declarations can only occur "+ + "in the doctype declaration.", @source) +- elsif @source.buffer[1] == ?? ++ elsif next_data[1] == ?? + return process_instruction + else + # Get the next tag +-- +2.43.0 + diff --git a/backport-Reduce-calls-to-StringScanner.new-108.patch b/backport-Reduce-calls-to-StringScanner.new-108.patch new file mode 100644 index 0000000000000000000000000000000000000000..4717fb1a6a963da2f3fa2266ba17ff5507b5f489 --- /dev/null +++ b/backport-Reduce-calls-to-StringScanner.new-108.patch @@ -0,0 +1,80 @@ +From 51217dbcc64ecc34aa70f126b103bedf07e153fc Mon Sep 17 00:00:00 2001 +From: NAITOH Jun +Date: Wed, 31 Jan 2024 16:35:55 +0900 +Subject: [PATCH] Reduce calls to StringScanner.new() (#108) + +## Why + +`StringScanner.new()` instances can be reused within parse_attributes, +reducing initialization costs. + +## Benchmark + +``` +RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/naitoh/.rbenv/versions/3.3.0/bin/ruby -v -S benchmark-driver /Users/naitoh/ghq/github.com/naitoh/rexml/benchmark/parse.yaml +ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin22] +Calculating ------------------------------------- + before after before(YJIT) after(YJIT) + dom 11.018 11.207 17.059 16.660 i/s - 100.000 times in 9.075992s 8.923280s 5.861969s 6.002555s + sax 29.843 30.821 45.518 47.505 i/s - 100.000 times in 3.350909s 3.244524s 2.196940s 2.105037s + pull 34.480 35.937 52.816 57.098 i/s - 100.000 times in 2.900205s 2.782632s 1.893370s 1.751378s + stream 32.430 33.516 46.247 48.412 i/s - 100.000 times in 3.083536s 2.983607s 2.162288s 2.065584s + +Comparison: + dom + before(YJIT): 17.1 i/s + after(YJIT): 16.7 i/s - 1.02x slower + after: 11.2 i/s - 1.52x slower + before: 11.0 i/s - 1.55x slower + + sax + after(YJIT): 47.5 i/s + before(YJIT): 45.5 i/s - 1.04x slower + after: 30.8 i/s - 1.54x slower + before: 29.8 i/s - 1.59x slower + + pull + after(YJIT): 57.1 i/s + before(YJIT): 52.8 i/s - 1.08x slower + after: 35.9 i/s - 1.59x slower + before: 34.5 i/s - 1.66x slower + + stream + after(YJIT): 48.4 i/s + before(YJIT): 46.2 i/s - 1.05x slower + after: 33.5 i/s - 1.44x slower + before: 32.4 i/s - 1.49x slower + +``` + +- YJIT=ON : 1.02x - 1.08x faster +- YJIT=OFF : 1.01x - 1.04x faster +--- + .bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +index 7126a12..b66b0ed 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +@@ -115,6 +115,7 @@ module REXML + def initialize( source ) + self.stream = source + @listeners = [] ++ @attributes_scanner = StringScanner.new('') + end + + def add_listener( listener ) +@@ -601,7 +602,8 @@ module REXML + return attributes, closed if raw_attributes.nil? + return attributes, closed if raw_attributes.empty? + +- scanner = StringScanner.new(raw_attributes) ++ @attributes_scanner.string = raw_attributes ++ scanner = @attributes_scanner + until scanner.eos? + if scanner.scan(/\s+/) + break if scanner.eos? +-- +2.43.0 + diff --git a/backport-Reject-unclosed-DOCTYPE-on-parsing-153.patch b/backport-Reject-unclosed-DOCTYPE-on-parsing-153.patch new file mode 100644 index 0000000000000000000000000000000000000000..42404b9668395aecd782e49844c8a6907b1c314c --- /dev/null +++ b/backport-Reject-unclosed-DOCTYPE-on-parsing-153.patch @@ -0,0 +1,101 @@ +From f7040112601104d71d3254a0834c4932b1b68f04 Mon Sep 17 00:00:00 2001 +From: Hiroya Fujinami +Date: Wed, 19 Jun 2024 14:47:34 +0900 +Subject: [PATCH] Reject unclosed DOCTYPE on parsing (#153) + +Fix #152 + +--------- + +Co-authored-by: Sutou Kouhei +--- + .../lib/rexml/parsers/baseparser.rb | 10 +++++++- + .../lib/rexml/parsers/treeparser.rb | 23 ++++++++----------- + 2 files changed, 18 insertions(+), 15 deletions(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +index 18fec5e..4f6b14e 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +@@ -205,7 +205,12 @@ module REXML + x, @closed = @closed, nil + return [ :end_element, x ] + end +- return [ :end_document ] if empty? ++ if empty? ++ if @document_status == :in_doctype ++ raise ParseException.new("Malformed DOCTYPE: unclosed", @source) ++ end ++ return [ :end_document ] ++ end + return @stack.shift if @stack.size > 0 + #STDERR.puts @source.encoding + #STDERR.puts "BUFFER = #{@source.buffer.inspect}" +@@ -355,6 +360,9 @@ module REXML + @document_status = :after_doctype + return [ :end_doctype ] + end ++ if @document_status == :in_doctype ++ raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source) ++ end + end + if @document_status == :after_doctype + @source.match(/\s*/um, true) +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/treeparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/treeparser.rb +index bf9a425..0cb6f7c 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/treeparser.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/treeparser.rb +@@ -16,7 +16,6 @@ module REXML + + def parse + tag_stack = [] +- in_doctype = false + entities = nil + begin + while true +@@ -39,17 +38,15 @@ module REXML + tag_stack.pop + @build_context = @build_context.parent + when :text +- if not in_doctype +- if @build_context[-1].instance_of? Text +- @build_context[-1] << event[1] +- else +- @build_context.add( +- Text.new(event[1], @build_context.whitespace, nil, true) +- ) unless ( +- @build_context.ignore_whitespace_nodes and +- event[1].strip.size==0 +- ) +- end ++ if @build_context[-1].instance_of? Text ++ @build_context[-1] << event[1] ++ else ++ @build_context.add( ++ Text.new(event[1], @build_context.whitespace, nil, true) ++ ) unless ( ++ @build_context.ignore_whitespace_nodes and ++ event[1].strip.size==0 ++ ) + end + when :comment + c = Comment.new( event[1] ) +@@ -60,14 +57,12 @@ module REXML + when :processing_instruction + @build_context.add( Instruction.new( event[1], event[2] ) ) + when :end_doctype +- in_doctype = false + entities.each { |k,v| entities[k] = @build_context.entities[k].value } + @build_context = @build_context.parent + when :start_doctype + doctype = DocType.new( event[1..-1], @build_context ) + @build_context = doctype + entities = {} +- in_doctype = true + when :attlistdecl + n = AttlistDecl.new( event[1..-1] ) + @build_context.add( n ) +-- +2.33.0 + + diff --git a/backport-Remove-Source-string-method-117.patch b/backport-Remove-Source-string-method-117.patch new file mode 100644 index 0000000000000000000000000000000000000000..d2f22481f7b371848137b8ddfaa227ba6ede98a2 --- /dev/null +++ b/backport-Remove-Source-string-method-117.patch @@ -0,0 +1,144 @@ +From d146162e9a61574499d10428bc0065754cd26601 Mon Sep 17 00:00:00 2001 +From: NAITOH Jun +Date: Mon, 4 Mar 2024 05:24:53 +0900 +Subject: [PATCH] Remove `Source#string=` method (#117) + +## Why? + +We want to just change scan pointer. + +https://github.com/ruby/rexml/pull/114#discussion_r1501773803 +> I want to just change scan pointer (`StringScanner#pos=`) instead of +changing `@scanner.string`. +--- + .bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb | 23 +++++++++++++---------- + .bundle/gems/rexml-3.2.5/lib/rexml/source.rb | 8 ++++++-- + 2 files changed, 19 insertions(+), 12 deletions(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +index bc59bcd..c79de0e 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +@@ -211,8 +211,9 @@ module REXML + #STDERR.puts @source.encoding + #STDERR.puts "BUFFER = #{@source.buffer.inspect}" + if @document_status == nil ++ start_position = @source.position + if @source.match("/um, true)[1] ] +@@ -224,7 +225,7 @@ module REXML + else + message = "#{base_error_message}: invalid name" + end +- @source.string = "/um, true) +@@ -325,7 +327,7 @@ module REXML + else + message = "#{base_error_message}: invalid name" + end +- @source.string = " " + scanner << match_data[1] +- scanner.pos = pos ++ scanner.pos = start_position + closed = !match_data[2].nil? + next + end +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +index 9eeba27..81d9645 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +@@ -76,8 +76,12 @@ module REXML + end + end + +- def string=(string) +- @scanner.string = string ++ def position ++ @scanner.pos ++ end ++ ++ def position=(pos) ++ @scanner.pos = pos + end + + # @return true if the Source is exhausted +-- +2.43.0 + diff --git a/backport-Remove-unnecessary-checks-in-baseparser-112.patch b/backport-Remove-unnecessary-checks-in-baseparser-112.patch new file mode 100644 index 0000000000000000000000000000000000000000..baf27d1a52cda35b2a51df649811fba1cacfb3ad --- /dev/null +++ b/backport-Remove-unnecessary-checks-in-baseparser-112.patch @@ -0,0 +1,49 @@ +From fc6cad570b849692a28f26a963ceb58edc282bbc Mon Sep 17 00:00:00 2001 +From: NAITOH Jun +Date: Fri, 16 Feb 2024 04:51:16 +0900 +Subject: [PATCH] Remove unnecessary checks in baseparser (#112) + +## Why + + +https://github.com/ruby/rexml/blob/444c9ce7449d3c5a75ae50087555ec73ae1963a8/lib/rexml/parsers/baseparser.rb#L352-L425 +``` + next_data = @source.buffer + if next_data.size < 2 + @source.read + next_data = @source.buffer + end + if next_data[0] == ?< + : + (omit) + : + else # next_data is a string of one or more characters other than '<'. + md = @source.match( TEXT_PATTERN, true ) # TEXT_PATTERN = /\A([^<]*)/um + text = md[1] + if md[0].length == 0 # md[0].length is greater than or equal to 1. + @source.match( /(\s+)/, true ) + end +``` +This is an unnecessary check because md[0].length is greater than or +equal to 1. +--- + .bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +index 3fe5c29..595669c 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +@@ -420,9 +420,6 @@ module REXML + else + md = @source.match( TEXT_PATTERN, true ) + text = md[1] +- if md[0].length == 0 +- @source.match( /(\s+)/, true ) +- end + return [ :text, text ] + end + rescue REXML::UndefinedNamespaceException +-- +2.43.0 + diff --git a/backport-Separate-IOSource-ensure_buffer-from-IOSource-match-.patch b/backport-Separate-IOSource-ensure_buffer-from-IOSource-match-.patch new file mode 100644 index 0000000000000000000000000000000000000000..af434d7877fc1ddb90e8b139e2aa88a2ff9056fe --- /dev/null +++ b/backport-Separate-IOSource-ensure_buffer-from-IOSource-match-.patch @@ -0,0 +1,133 @@ +From 77cb0dcf0af1b31acf7fc813315c7c3defac23f8 Mon Sep 17 00:00:00 2001 +From: NAITOH Jun +Date: Thu, 7 Mar 2024 07:02:34 +0900 +Subject: [PATCH] Separate `IOSource#ensure_buffer` from `IOSource#match`. + (#118) + +## Why? + +It would affect performance to do a read check in `IOSource#match` every +time, Separate read processing from `IOSource#ensure_buffer`. + +Use `IOSource#ensure_buffer` in the following cases where +`@source.buffer` is empty. + +1. at the start of pull_event +2. If a trailing `'>'` pattern matches, as in `@source.match(/\s*>/um)`. + +## Benchmark + +``` +RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/naitoh/.rbenv/versions/3.3.0/bin/ruby -v -S benchmark-driver /Users/naitoh/ghq/github.com/naitoh/rexml/benchmark/parse.yaml +ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin22] +Calculating ------------------------------------- + before after before(YJIT) after(YJIT) + dom 10.278 10.986 16.430 16.941 i/s - 100.000 times in 9.729858s 9.102574s 6.086579s 5.902885s + sax 30.166 30.496 49.851 51.596 i/s - 100.000 times in 3.315008s 3.279069s 2.005961s 1.938123s + pull 35.459 36.380 60.266 63.134 i/s - 100.000 times in 2.820181s 2.748745s 1.659301s 1.583928s + stream 33.762 34.636 55.173 55.859 i/s - 100.000 times in 2.961948s 2.887131s 1.812485s 1.790218s + +Comparison: + dom + after(YJIT): 16.9 i/s + before(YJIT): 16.4 i/s - 1.03x slower + after: 11.0 i/s - 1.54x slower + before: 10.3 i/s - 1.65x slower + + sax + after(YJIT): 51.6 i/s + before(YJIT): 49.9 i/s - 1.04x slower + after: 30.5 i/s - 1.69x slower + before: 30.2 i/s - 1.71x slower + + pull + after(YJIT): 63.1 i/s + before(YJIT): 60.3 i/s - 1.05x slower + after: 36.4 i/s - 1.74x slower + before: 35.5 i/s - 1.78x slower + + stream + after(YJIT): 55.9 i/s + before(YJIT): 55.2 i/s - 1.01x slower + after: 34.6 i/s - 1.61x slower + before: 33.8 i/s - 1.65x slower + +``` + +- YJIT=ON : 1.01x - 1.05x faster +- YJIT=OFF : 1.01x - 1.06x faster +--- + .bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb | 5 +++++ + .bundle/gems/rexml-3.2.5/lib/rexml/source.rb | 8 +++++++- + 2 files changed, 12 insertions(+), 1 deletion(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +index c79de0e..c01b087 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +@@ -210,6 +210,8 @@ module REXML + return @stack.shift if @stack.size > 0 + #STDERR.puts @source.encoding + #STDERR.puts "BUFFER = #{@source.buffer.inspect}" ++ ++ @source.ensure_buffer + if @document_status == nil + start_position = @source.position + if @source.match("/um, true) + id = [nil, nil, nil] + @document_status = :after_doctype ++ @source.ensure_buffer + else + id = parse_id(base_error_message, + accept_external_id: true, +@@ -248,6 +251,7 @@ module REXML + @document_status = :in_doctype + elsif @source.match(/\s*>/um, true) + @document_status = :after_doctype ++ @source.ensure_buffer + else + message = "#{base_error_message}: garbage after external ID" + raise REXML::ParseException.new(message, @source) +@@ -646,6 +650,7 @@ module REXML + raise REXML::ParseException.new(message, @source) + end + unless scanner.scan(/.*#{Regexp.escape(quote)}/um) ++ @source.ensure_buffer + match_data = @source.match(/^(.*?)(\/)?>/um, true) + if match_data + scanner << "/" if closed +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +index 81d9645..7f47c2b 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +@@ -68,6 +68,9 @@ module REXML + def read + end + ++ def ensure_buffer ++ end ++ + def match(pattern, cons=false) + if cons + @scanner.scan(pattern).nil? ? nil : @scanner +@@ -165,11 +168,14 @@ module REXML + end + end + ++ def ensure_buffer ++ read if @scanner.eos? && @source ++ end ++ + # Note: When specifying a string for 'pattern', it must not include '>' except in the following formats: + # - ">" + # - "XXX>" (X is any string excluding '>') + def match( pattern, cons=false ) +- read if @scanner.eos? && @source + while true + if cons + md = @scanner.scan(pattern) +-- +2.43.0 + diff --git a/backport-Source-read_until-Add-missing-position-move-on-all-r.patch b/backport-Source-read_until-Add-missing-position-move-on-all-r.patch new file mode 100644 index 0000000000000000000000000000000000000000..421dbb545b3515dbb8a3cae263dd6cf5f2f24e6e --- /dev/null +++ b/backport-Source-read_until-Add-missing-position-move-on-all-r.patch @@ -0,0 +1,57 @@ +From 3e3893d48357c04c4f3a7088819880905a64742d Mon Sep 17 00:00:00 2001 +From: Sutou Kouhei +Date: Sun, 2 Jun 2024 17:07:04 +0900 +Subject: [PATCH] Source#read_until: Add missing position move on all read + +--- + .../gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb | 2 ++ + .bundle/gems/rexml-3.2.5/lib/rexml/source.rb | 11 +++++++++-- + 2 files changed, 11 insertions(+), 2 deletions(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +index d09237c..9d20044 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +@@ -633,8 +633,10 @@ module REXML + raise REXML::ParseException.new(message, @source) + end + quote = match[1] ++ start_position = @source.position + value = @source.read_until(quote) + unless value.chomp!(quote) ++ @source.position = start_position + message = "Missing attribute value end quote: <#{name}>: <#{quote}>" + raise REXML::ParseException.new(message, @source) + end +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +index 999f467..3be3f84 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +@@ -69,7 +69,12 @@ module REXML + end + + def read_until(term) +- @scanner.scan_until(/#{Regexp.escape(term)}/) or @scanner.rest ++ data = @scanner.scan_until(/#{Regexp.escape(term)}/) ++ unless data ++ data = @scanner.rest ++ @scanner.pos = @scanner.string.bytesize ++ end ++ data + end + + def ensure_buffer +@@ -181,7 +186,9 @@ module REXML + @scanner << readline(term) + end + rescue EOFError +- @scanner.rest ++ rest = @scanner.rest ++ @scanner.pos = @scanner.string.bytesize ++ rest + else + read if @scanner.eos? and !@source.eof? + str +-- +2.43.0 + diff --git a/backport-Suppress-a-warning.patch b/backport-Suppress-a-warning.patch new file mode 100644 index 0000000000000000000000000000000000000000..8a53092fa0051b698d876d62d673ce1bd134def1 --- /dev/null +++ b/backport-Suppress-a-warning.patch @@ -0,0 +1,24 @@ +From 94e180e939baff8f7e328a287bb96ebbd99db6eb Mon Sep 17 00:00:00 2001 +From: Sutou Kouhei +Date: Thu, 16 May 2024 14:30:35 +0900 +Subject: [PATCH] Suppress a warning + +--- + .bundle/gems/rexml-3.2.5/lib/rexml/source.rb | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +index 999751b..0f3c501 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +@@ -174,7 +174,6 @@ module REXML + + def read_until(term) + pattern = Regexp.union(term) +- data = [] + begin + until str = @scanner.scan_until(pattern) + @scanner << readline(term) +-- +2.43.0 + diff --git a/backport-Use-Regexp.escape-instead-of-Regexp.union.patch b/backport-Use-Regexp.escape-instead-of-Regexp.union.patch new file mode 100644 index 0000000000000000000000000000000000000000..f3a7809ad99ecfa2d54a84ae7c018b754d863ed1 --- /dev/null +++ b/backport-Use-Regexp.escape-instead-of-Regexp.union.patch @@ -0,0 +1,35 @@ +From f525ef79367e70b041763c2a6c332628b3f85e48 Mon Sep 17 00:00:00 2001 +From: Sutou Kouhei +Date: Thu, 30 May 2024 20:56:26 +0900 +Subject: [PATCH] Use /#{Regexp.escape}/ instead of Regexp.union + +It's for readability. +--- + .bundle/gems/rexml-3.2.5/lib/rexml/source.rb | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +index 0f3c501..4483aec 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +@@ -69,7 +69,7 @@ module REXML + end + + def read_until(term) +- @scanner.scan_until(Regexp.union(term)) or @scanner.rest ++ @scanner.scan_until(/#{Regexp.escape(term)}/) or @scanner.rest + end + + def ensure_buffer +@@ -173,7 +173,7 @@ module REXML + end + + def read_until(term) +- pattern = Regexp.union(term) ++ pattern = /#{Regexp.escape(term)}/ + begin + until str = @scanner.scan_until(pattern) + @scanner << readline(term) +-- +2.43.0 + diff --git a/backport-Use-more-StringScanner-based-API-to-parse-XML-114.patch b/backport-Use-more-StringScanner-based-API-to-parse-XML-114.patch new file mode 100644 index 0000000000000000000000000000000000000000..c9789548c30b290c03e9182736d8d6240416ed78 --- /dev/null +++ b/backport-Use-more-StringScanner-based-API-to-parse-XML-114.patch @@ -0,0 +1,556 @@ +From 370666e314816b57ecd5878e757224c3b6bc93f5 Mon Sep 17 00:00:00 2001 +From: NAITOH Jun +Date: Tue, 27 Feb 2024 09:48:35 +0900 +Subject: [PATCH] Use more StringScanner based API to parse XML (#114) + +## Why? + +Improve maintainability by optimizing the process so that the parsing +process proceeds using StringScanner#scan. + +## Changed +- Change `REXML::Parsers::BaseParser` from `frozen_string_literal: +false` to `frozen_string_literal: true`. +- Added `Source#string=` method for error message output. +- Added TestParseDocumentTypeDeclaration#test_no_name test case. +- Of the `intSubset` of DOCTYPE, " +--- + .bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb | 325 ++++++++++--------- + .bundle/gems/rexml-3.2.5/lib/rexml/source.rb | 31 +- + 2 files changed, 188 insertions(+), 168 deletions(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +index 595669c..bc59bcd 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +@@ -1,4 +1,4 @@ +-# frozen_string_literal: false ++# frozen_string_literal: true + require_relative '../parseexception' + require_relative '../undefinednamespaceexception' + require_relative '../source' +@@ -112,6 +112,19 @@ module REXML + "apos" => [/'/, "'", "'", /'/] + } + ++ module Private ++ INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um ++ TAG_PATTERN = /((?>#{QNAME_STR}))/um ++ CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um ++ ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um ++ NAME_PATTERN = /\s*#{NAME}/um ++ GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>" ++ PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>" ++ ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um ++ end ++ private_constant :Private ++ include Private ++ + def initialize( source ) + self.stream = source + @listeners = [] +@@ -198,183 +211,172 @@ module REXML + #STDERR.puts @source.encoding + #STDERR.puts "BUFFER = #{@source.buffer.inspect}" + if @document_status == nil +- word = @source.match( /\A((?:\s+)|(?:<[^>]*>))/um ) +- word = word[1] unless word.nil? +- #STDERR.puts "WORD = #{word.inspect}" +- case word +- when COMMENT_START +- return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ] +- when XMLDECL_START +- #STDERR.puts "XMLDECL" +- results = @source.match( XMLDECL_PATTERN, true )[1] +- version = VERSION.match( results ) +- version = version[1] unless version.nil? +- encoding = ENCODING.match(results) +- encoding = encoding[1] unless encoding.nil? +- if need_source_encoding_update?(encoding) +- @source.encoding = encoding +- end +- if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding +- encoding = "UTF-16" +- end +- standalone = STANDALONE.match(results) +- standalone = standalone[1] unless standalone.nil? +- return [ :xmldecl, version, encoding, standalone ] +- when INSTRUCTION_START ++ if @source.match("/um, true) +- id = [nil, nil, nil] +- @document_status = :after_doctype +- else +- id = parse_id(base_error_message, +- accept_external_id: true, +- accept_public_id: false) +- if id[0] == "SYSTEM" +- # For backward compatibility +- id[1], id[2] = id[2], nil ++ elsif @source.match("/um, true)[1] ] ++ elsif @source.match("DOCTYPE", true) ++ base_error_message = "Malformed DOCTYPE" ++ unless @source.match(/\s+/um, true) ++ if @source.match(">") ++ message = "#{base_error_message}: name is missing" ++ else ++ message = "#{base_error_message}: invalid name" ++ end ++ @source.string = "/um, true) ++ elsif @source.match(/\s*>/um, true) ++ id = [nil, nil, nil] + @document_status = :after_doctype + else +- message = "#{base_error_message}: garbage after external ID" +- raise REXML::ParseException.new(message, @source) ++ id = parse_id(base_error_message, ++ accept_external_id: true, ++ accept_public_id: false) ++ if id[0] == "SYSTEM" ++ # For backward compatibility ++ id[1], id[2] = id[2], nil ++ end ++ if @source.match(/\s*\[/um, true) ++ @document_status = :in_doctype ++ elsif @source.match(/\s*>/um, true) ++ @document_status = :after_doctype ++ else ++ message = "#{base_error_message}: garbage after external ID" ++ raise REXML::ParseException.new(message, @source) ++ end + end +- end +- args = [:start_doctype, name, *id] +- if @document_status == :after_doctype +- @source.match(/\A\s*/um, true) +- @stack << [ :end_doctype ] +- end +- return args +- when /\A\s+/ +- else +- @document_status = :after_doctype +- if @source.encoding == "UTF-8" +- @source.buffer_encoding = ::Encoding::UTF_8 ++ args = [:start_doctype, name, *id] ++ if @document_status == :after_doctype ++ @source.match(/\s*/um, true) ++ @stack << [ :end_doctype ] ++ end ++ return args ++ else ++ message = "Invalid XML" ++ raise REXML::ParseException.new(message, @source) + end + end + end + if @document_status == :in_doctype +- md = @source.match(/\A\s*(.*?>)/um) +- case md[1] +- when SYSTEMENTITY +- match = @source.match( SYSTEMENTITY, true )[1] +- return [ :externalentity, match ] +- +- when ELEMENTDECL_START +- return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ] +- +- when ENTITY_START +- match = [:entitydecl, *@source.match( ENTITYDECL, true ).captures.compact] +- ref = false +- if match[1] == '%' +- ref = true +- match.delete_at 1 +- end +- # Now we have to sort out what kind of entity reference this is +- if match[2] == 'SYSTEM' +- # External reference +- match[3] = match[3][1..-2] # PUBID +- match.delete_at(4) if match.size > 4 # Chop out NDATA decl +- # match is [ :entity, name, SYSTEM, pubid(, ndata)? ] +- elsif match[2] == 'PUBLIC' +- # External reference +- match[3] = match[3][1..-2] # PUBID +- match[4] = match[4][1..-2] # HREF +- match.delete_at(5) if match.size > 5 # Chop out NDATA decl +- # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ] +- else +- match[2] = match[2][1..-2] +- match.pop if match.size == 4 +- # match is [ :entity, name, value ] +- end +- match << '%' if ref +- return match +- when ATTLISTDECL_START +- md = @source.match( ATTLISTDECL_PATTERN, true ) +- raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil? +- element = md[1] +- contents = md[0] +- +- pairs = {} +- values = md[0].scan( ATTDEF_RE ) +- values.each do |attdef| +- unless attdef[3] == "#IMPLIED" +- attdef.compact! +- val = attdef[3] +- val = attdef[4] if val == "#FIXED " +- pairs[attdef[0]] = val +- if attdef[0] =~ /^xmlns:(.*)/ +- @nsstack[0] << $1 +- end ++ @source.match(/\s*/um, true) # skip spaces ++ if @source.match("/um, true) ++ raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil? ++ return [ :elementdecl, "/um) +- message = "#{base_error_message}: name is missing" ++ # Now we have to sort out what kind of entity reference this is ++ if match[2] == 'SYSTEM' ++ # External reference ++ match[3] = match[3][1..-2] # PUBID ++ match.delete_at(4) if match.size > 4 # Chop out NDATA decl ++ # match is [ :entity, name, SYSTEM, pubid(, ndata)? ] ++ elsif match[2] == 'PUBLIC' ++ # External reference ++ match[3] = match[3][1..-2] # PUBID ++ match[4] = match[4][1..-2] # HREF ++ match.delete_at(5) if match.size > 5 # Chop out NDATA decl ++ # match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ] + else +- message = "#{base_error_message}: invalid declaration name" ++ match[2] = match[2][1..-2] ++ match.pop if match.size == 4 ++ # match is [ :entity, name, value ] + end +- raise REXML::ParseException.new(message, @source) +- end +- name = parse_name(base_error_message) +- id = parse_id(base_error_message, +- accept_external_id: true, +- accept_public_id: true) +- unless @source.match(/\A\s*>/um, true) +- message = "#{base_error_message}: garbage before end >" +- raise REXML::ParseException.new(message, @source) ++ match << '%' if ref ++ return match ++ elsif @source.match("ATTLIST", true) ++ md = @source.match(ATTLISTDECL_END, true) ++ raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil? ++ element = md[1] ++ contents = md[0] ++ ++ pairs = {} ++ values = md[0].scan( ATTDEF_RE ) ++ values.each do |attdef| ++ unless attdef[3] == "#IMPLIED" ++ attdef.compact! ++ val = attdef[3] ++ val = attdef[4] if val == "#FIXED " ++ pairs[attdef[0]] = val ++ if attdef[0] =~ /^xmlns:(.*)/ ++ @nsstack[0] << $1 ++ end ++ end ++ end ++ return [ :attlistdecl, element, pairs, contents ] ++ elsif @source.match("NOTATION", true) ++ base_error_message = "Malformed notation declaration" ++ unless @source.match(/\s+/um, true) ++ if @source.match(">") ++ message = "#{base_error_message}: name is missing" ++ else ++ message = "#{base_error_message}: invalid name" ++ end ++ @source.string = " /um, true) ++ message = "#{base_error_message}: garbage before end >" ++ raise REXML::ParseException.new(message, @source) ++ end ++ return [:notationdecl, name, *id] ++ elsif md = @source.match(/--(.*?)-->/um, true) ++ case md[1] ++ when /--/, /-\z/ ++ raise REXML::ParseException.new("Malformed comment", @source) ++ end ++ return [ :comment, md[1] ] if md + end +- return [:notationdecl, name, *id] +- when DOCTYPE_END ++ elsif match = @source.match(/(%.*?;)\s*/um, true) ++ return [ :externalentity, match[1] ] ++ elsif @source.match(/\]\s*>/um, true) + @document_status = :after_doctype +- @source.match( DOCTYPE_END, true ) + return [ :end_doctype ] + end + end + if @document_status == :after_doctype +- @source.match(/\A\s*/um, true) ++ @source.match(/\s*/um, true) + end + begin +- next_data = @source.buffer +- if next_data.size < 2 +- @source.read +- next_data = @source.buffer +- end +- if next_data[0] == ?< +- if next_data[1] == ?/ ++ if @source.match("<", true) ++ if @source.match("/", true) + @nsstack.shift + last_tag = @tags.pop +- md = @source.match( CLOSE_MATCH, true ) ++ md = @source.match(CLOSE_PATTERN, true) + if md and !last_tag + message = "Unexpected top-level end tag (got '#{md[1]}')" + raise REXML::ParseException.new(message, @source) + end + if md.nil? or last_tag != md[1] + message = "Missing end tag for '#{last_tag}'" +- message << " (got '#{md[1]}')" if md ++ message += " (got '#{md[1]}')" if md ++ @source.string = "]*>)/um) ++ elsif @source.match("!", true) ++ md = @source.match(/([^>]*>)/um) + #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}" + raise REXML::ParseException.new("Malformed node", @source) unless md +- if md[0][2] == ?- +- md = @source.match( COMMENT_PATTERN, true ) ++ if md[0][0] == ?- ++ md = @source.match(/--(.*?)-->/um, true) + + case md[1] + when /--/, /-\z/ +@@ -383,17 +385,18 @@ module REXML + + return [ :comment, md[1] ] if md + else +- md = @source.match( CDATA_PATTERN, true ) ++ md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true) + return [ :cdata, md[1] ] if md + end + raise REXML::ParseException.new( "Declarations can only occur "+ + "in the doctype declaration.", @source) +- elsif next_data[1] == ?? ++ elsif @source.match("?", true) + return process_instruction + else + # Get the next tag +- md = @source.match(TAG_MATCH, true) ++ md = @source.match(TAG_PATTERN, true) + unless md ++ @source.string = "<" + @source.buffer + raise REXML::ParseException.new("malformed XML: missing tag start", @source) + end + tag = md[1] +@@ -418,7 +421,7 @@ module REXML + return [ :start_element, tag, attributes ] + end + else +- md = @source.match( TEXT_PATTERN, true ) ++ md = @source.match(/([^<]*)/um, true) + text = md[1] + return [ :text, text ] + end +@@ -462,8 +465,7 @@ module REXML + + # Unescapes all possible entities + def unnormalize( string, entities=nil, filter=nil ) +- rv = string.clone +- rv.gsub!( /\r\n?/, "\n" ) ++ rv = string.gsub( /\r\n?/, "\n" ) + matches = rv.scan( REFERENCE_RE ) + return rv if matches.size == 0 + rv.gsub!( /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) { +@@ -498,9 +500,9 @@ module REXML + end + + def parse_name(base_error_message) +- md = @source.match(/\A\s*#{NAME}/um, true) ++ md = @source.match(NAME_PATTERN, true) + unless md +- if @source.match(/\A\s*\S/um) ++ if @source.match(/\s*\S/um) + message = "#{base_error_message}: invalid name" + else + message = "#{base_error_message}: name is missing" +@@ -577,11 +579,28 @@ module REXML + end + + def process_instruction +- match_data = @source.match(INSTRUCTION_PATTERN, true) ++ match_data = @source.match(INSTRUCTION_END, true) + unless match_data + message = "Invalid processing instruction node" ++ @source.string = " +Date: Sun, 21 Jan 2024 20:02:00 +0900 +Subject: [PATCH] Use `@scanner << readline` instead of `@scanner.string = + @scanner.rest + readline` (#107) + +## Why + +JRuby's `StringScanner#<<` and `StringScanner#scan` OutOfMemoryError has +been resolved in strscan gem 3.0.9. + +https://github.com/ruby/strscan/issues/83 + +## Benchmark + +``` +RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/naitoh/.rbenv/versions/3.3.0/bin/ruby -v -S benchmark-driver /Users/naitoh/ghq/github.com/naitoh/rexml/benchmark/parse.yaml +ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin22] +Calculating ------------------------------------- + before after before(YJIT) after(YJIT) + dom 10.958 11.044 16.615 16.783 i/s - 100.000 times in 9.126104s 9.055023s 6.018799s 5.958437s + sax 29.624 29.609 44.390 45.370 i/s - 100.000 times in 3.375641s 3.377372s 2.252774s 2.204080s + pull 33.868 34.695 51.173 53.492 i/s - 100.000 times in 2.952679s 2.882229s 1.954138s 1.869422s + stream 31.719 32.351 43.604 45.403 i/s - 100.000 times in 3.152713s 3.091052s 2.293356s 2.202514s + +Comparison: + dom + after(YJIT): 16.8 i/s + before(YJIT): 16.6 i/s - 1.01x slower + after: 11.0 i/s - 1.52x slower + before: 11.0 i/s - 1.53x slower + + sax + after(YJIT): 45.4 i/s + before(YJIT): 44.4 i/s - 1.02x slower + before: 29.6 i/s - 1.53x slower + after: 29.6 i/s - 1.53x slower + + pull + after(YJIT): 53.5 i/s + before(YJIT): 51.2 i/s - 1.05x slower + after: 34.7 i/s - 1.54x slower + before: 33.9 i/s - 1.58x slower + + stream + after(YJIT): 45.4 i/s + before(YJIT): 43.6 i/s - 1.04x slower + after: 32.4 i/s - 1.40x slower + before: 31.7 i/s - 1.43x slower + +``` + +- YJIT=ON : 1.01x - 1.05x faster +- YJIT=OFF : 1.00x - 1.02x faster +--- + .bundle/gems/rexml-3.2.5/lib/rexml/source.rb | 6 +----- + 1 files changed, 1 insertions(+), 5 deletions(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +index 71b08f9..db78a12 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +@@ -149,11 +149,7 @@ module REXML + + def read + begin +- # NOTE: `@scanner << readline` does not free memory, so when parsing huge XML in JRuby's DOM, +- # out-of-memory error `Java::JavaLang::OutOfMemoryError: Java heap space` occurs. +- # `@scanner.string = @scanner.rest + readline` frees memory that is already consumed +- # and avoids this problem. +- @scanner.string = @scanner.rest + readline ++ @scanner << readline + rescue Exception, NameError + @source = nil + end +-- +2.43.0 + diff --git a/backport-Use-string-scanner-with-baseparser-105.patch b/backport-Use-string-scanner-with-baseparser-105.patch new file mode 100644 index 0000000000000000000000000000000000000000..1debfcc5f51a2b1c0014b69e0783a9c2d3011dbb --- /dev/null +++ b/backport-Use-string-scanner-with-baseparser-105.patch @@ -0,0 +1,387 @@ +From 810d2285235d5501a0a124f300832e6e9515da3c Mon Sep 17 00:00:00 2001 +From: NAITOH Jun +Date: Wed, 17 Jan 2024 15:32:57 +0900 +Subject: [PATCH] Use string scanner with baseparser (#105) + +Using StringScanner reduces the string copying process and speeds up the +process. + +And I removed unnecessary methods. + + +https://github.com/ruby/rexml/actions/runs/7549990000/job/20554906140?pr=105 + +``` +ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [x86_64-linux] +Calculating ------------------------------------- + rexml 3.2.6 master 3.2.6(YJIT) master(YJIT) + dom 4.868 5.077 8.137 8.303 i/s - 100.000 times in 20.540529s 19.696590s 12.288900s 12.043666s + sax 13.597 13.953 19.206 20.948 i/s - 100.000 times in 7.354343s 7.167142s 5.206745s 4.773765s + pull 15.641 16.918 22.266 25.378 i/s - 100.000 times in 6.393424s 5.910955s 4.491201s 3.940471s + stream 14.339 15.844 19.810 22.206 i/s - 100.000 times in 6.973856s 6.311350s 5.047957s 4.503244s + +Comparison: + dom + master(YJIT): 8.3 i/s + 3.2.6(YJIT): 8.1 i/s - 1.02x slower + master: 5.1 i/s - 1.64x slower + rexml 3.2.6: 4.9 i/s - 1.71x slower + + sax + master(YJIT): 20.9 i/s + 3.2.6(YJIT): 19.2 i/s - 1.09x slower + master: 14.0 i/s - 1.50x slower + rexml 3.2.6: 13.6 i/s - 1.54x slower + + pull + master(YJIT): 25.4 i/s + 3.2.6(YJIT): 22.3 i/s - 1.14x slower + master: 16.9 i/s - 1.50x slower + rexml 3.2.6: 15.6 i/s - 1.62x slower + + stream + master(YJIT): 22.2 i/s + 3.2.6(YJIT): 19.8 i/s - 1.12x slower + master: 15.8 i/s - 1.40x slower + rexml 3.2.6: 14.3 i/s - 1.55x slower +``` + +- YJIT=ON : 1.02x - 1.14x faster +- YJIT=OFF : 1.02x - 1.10x faster + +--------- + +Co-authored-by: Sutou Kouhei +--- + .bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb | 21 ++-- + .bundle/gems/rexml-3.2.5/lib/rexml/source.rb | 149 ++++++++------------------ + 2 files changed, 56 insertions(+), 114 deletions(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +index 305b120..65bad26 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +@@ -96,7 +96,7 @@ module REXML + ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))" + PEDECL = "" + GEDECL = "" +- ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um ++ ENTITYDECL = /\s*(?:#{GEDECL})|\s*(?:#{PEDECL})/um + + NOTATIONDECL_START = /\A\s*0 +- rv +- end +- + def read + end + +- def consume( pattern ) +- @buffer = $' if pattern.match( @buffer ) +- end +- +- def match_to( char, pattern ) +- return pattern.match(@buffer) +- end +- +- def match_to_consume( char, pattern ) +- md = pattern.match(@buffer) +- @buffer = $' +- return md +- end +- + def match(pattern, cons=false) +- md = pattern.match(@buffer) +- @buffer = $' if cons and md +- return md ++ if cons ++ @scanner.scan(pattern).nil? ? nil : @scanner ++ else ++ @scanner.check(pattern).nil? ? nil : @scanner ++ end + end + + # @return true if the Source is exhausted + def empty? +- @buffer == "" +- end +- +- def position +- @orig.index( @buffer ) ++ @scanner.eos? + end + + # @return the current line in the source + def current_line + lines = @orig.split +- res = lines.grep @buffer[0..30] ++ res = lines.grep @scanner.rest[0..30] + res = res[-1] if res.kind_of? Array + lines.index( res ) if res + end + + private ++ + def detect_encoding +- buffer_encoding = @buffer.encoding ++ scanner_encoding = @scanner.rest.encoding + detected_encoding = "UTF-8" + begin +- @buffer.force_encoding("ASCII-8BIT") +- if @buffer[0, 2] == "\xfe\xff" +- @buffer[0, 2] = "" ++ @scanner.string.force_encoding("ASCII-8BIT") ++ if @scanner.scan(/\xfe\xff/n) + detected_encoding = "UTF-16BE" +- elsif @buffer[0, 2] == "\xff\xfe" +- @buffer[0, 2] = "" ++ elsif @scanner.scan(/\xff\xfe/n) + detected_encoding = "UTF-16LE" +- elsif @buffer[0, 3] == "\xef\xbb\xbf" +- @buffer[0, 3] = "" ++ elsif @scanner.scan(/\xef\xbb\xbf/n) + detected_encoding = "UTF-8" + end + ensure +- @buffer.force_encoding(buffer_encoding) ++ @scanner.string.force_encoding(scanner_encoding) + end + self.encoding = detected_encoding + end + + def encoding_updated + if @encoding != 'UTF-8' +- @buffer = decode(@buffer) ++ @scanner.string = decode(@scanner.rest) + @to_utf = true + else + @to_utf = false +- @buffer.force_encoding ::Encoding::UTF_8 ++ @scanner.string.force_encoding(::Encoding::UTF_8) + end + end + end +@@ -172,7 +138,7 @@ module REXML + end + + if !@to_utf and +- @buffer.respond_to?(:force_encoding) and ++ @orig.respond_to?(:force_encoding) and + @source.respond_to?(:external_encoding) and + @source.external_encoding != ::Encoding::UTF_8 + @force_utf8 = true +@@ -181,65 +147,44 @@ module REXML + end + end + +- def scan(pattern, cons=false) +- rv = super +- # You'll notice that this next section is very similar to the same +- # section in match(), but just a liiittle different. This is +- # because it is a touch faster to do it this way with scan() +- # than the way match() does it; enough faster to warrant duplicating +- # some code +- if rv.size == 0 +- until @buffer =~ pattern or @source.nil? +- begin +- @buffer << readline +- rescue Iconv::IllegalSequence +- raise +- rescue +- @source = nil +- end +- end +- rv = super +- end +- rv.taint if RUBY_VERSION < '2.7' +- rv +- end +- + def read + begin +- @buffer << readline ++ # NOTE: `@scanner << readline` does not free memory, so when parsing huge XML in JRuby's DOM, ++ # out-of-memory error `Java::JavaLang::OutOfMemoryError: Java heap space` occurs. ++ # `@scanner.string = @scanner.rest + readline` frees memory that is already consumed ++ # and avoids this problem. ++ @scanner.string = @scanner.rest + readline + rescue Exception, NameError + @source = nil + end + end + +- def consume( pattern ) +- match( pattern, true ) +- end +- + def match( pattern, cons=false ) +- rv = pattern.match(@buffer) +- @buffer = $' if cons and rv +- while !rv and @source ++ if cons ++ md = @scanner.scan(pattern) ++ else ++ md = @scanner.check(pattern) ++ end ++ while md.nil? and @source + begin +- @buffer << readline +- rv = pattern.match(@buffer) +- @buffer = $' if cons and rv ++ @scanner << readline ++ if cons ++ md = @scanner.scan(pattern) ++ else ++ md = @scanner.check(pattern) ++ end + rescue + @source = nil + end + end +- rv.taint if RUBY_VERSION < '2.7' +- rv ++ ++ md.nil? ? nil : @scanner + end + + def empty? + super and ( @source.nil? || @source.eof? ) + end + +- def position +- @er_source.pos rescue 0 +- end +- + # @return the current line in the source + def current_line + begin +@@ -290,7 +235,7 @@ module REXML + @source.set_encoding(@encoding, @encoding) + end + @line_break = encode(">") +- @pending_buffer, @buffer = @buffer, "" ++ @pending_buffer, @scanner.string = @scanner.rest, "" + @pending_buffer.force_encoding(@encoding) + super + end +-- +2.43.0 + diff --git a/backport-source-Remove-unnecessary-string-length-comparisons-.patch b/backport-source-Remove-unnecessary-string-length-comparisons-.patch new file mode 100644 index 0000000000000000000000000000000000000000..15c2a2e977574ec798ab418386fb0199bcf9bd2c --- /dev/null +++ b/backport-source-Remove-unnecessary-string-length-comparisons-.patch @@ -0,0 +1,99 @@ +From 19975fea162ca5b31ac8218087ea2924aee90e5d Mon Sep 17 00:00:00 2001 +From: NAITOH Jun +Date: Sun, 3 Mar 2024 18:36:34 +0900 +Subject: [PATCH] source: Remove unnecessary string length comparisons in the + case of string comparisons (#116) + +## Why + +https://github.com/ruby/rexml/blob/370666e314816b57ecd5878e757224c3b6bc93f5/lib/rexml/source.rb#L208-L234 + +Because `@line_break = encode(">")`, the end of `@scanner << readline` +is one of the following. + +1. ">" +2. "X>" +3. "X" (eof) + +This will not be matched by additional reads in the following cases. + +- `@source.match(">")` +- `@source.match(">X")` + +## Benchmark + +``` +RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/naitoh/.rbenv/versions/3.3.0/bin/ruby -v -S benchmark-driver /Users/naitoh/ghq/github.com/naitoh/rexml/benchmark/parse.yaml +ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin22] +Calculating ------------------------------------- + before after before(YJIT) after(YJIT) + dom 10.689 10.736 18.484 18.108 i/s - 100.000 times in 9.355754s 9.314792s 5.409984s 5.522527s + sax 30.793 31.583 52.965 52.641 i/s - 100.000 times in 3.247486s 3.166258s 1.888036s 1.899660s + pull 36.308 37.182 63.773 64.669 i/s - 100.000 times in 2.754203s 2.689440s 1.568069s 1.546325s + stream 34.936 35.991 56.830 57.729 i/s - 100.000 times in 2.862361s 2.778467s 1.759632s 1.732238s + +Comparison: + dom + before(YJIT): 18.5 i/s + after(YJIT): 18.1 i/s - 1.02x slower + after: 10.7 i/s - 1.72x slower + before: 10.7 i/s - 1.73x slower + + sax + before(YJIT): 53.0 i/s + after(YJIT): 52.6 i/s - 1.01x slower + after: 31.6 i/s - 1.68x slower + before: 30.8 i/s - 1.72x slower + + pull + after(YJIT): 64.7 i/s + before(YJIT): 63.8 i/s - 1.01x slower + after: 37.2 i/s - 1.74x slower + before: 36.3 i/s - 1.78x slower + + stream + after(YJIT): 57.7 i/s + before(YJIT): 56.8 i/s - 1.02x slower + after: 36.0 i/s - 1.60x slower + before: 34.9 i/s - 1.65x slower +``` + +- YJIT=ON : 0.98x - 1.02x faster +- YJIT=OFF : 1.00x - 1.03x faster +--- + .bundle/gems/rexml-3.2.5/lib/rexml/source.rb | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +index 4111d1d..9eeba27 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +@@ -161,6 +161,9 @@ module REXML + end + end + ++ # Note: When specifying a string for 'pattern', it must not include '>' except in the following formats: ++ # - ">" ++ # - "XXX>" (X is any string excluding '>') + def match( pattern, cons=false ) + read if @scanner.eos? && @source + while true +@@ -170,7 +173,7 @@ module REXML + md = @scanner.check(pattern) + end + break if md +- return nil if pattern.is_a?(String) && pattern.bytesize <= @scanner.rest_size ++ return nil if pattern.is_a?(String) + return nil if @source.nil? + return nil unless read + end +-- +2.43.0 + diff --git a/ruby.spec b/ruby.spec index d8eba7cd6285e7414dbc474427c7e819428ea275..fdf03dc5146a350c5de34c880633dd989b047f34 100644 --- a/ruby.spec +++ b/ruby.spec @@ -33,7 +33,7 @@ Name: ruby Version: %{ruby_version} -Release: 135 +Release: 136 Summary: Object-oriented scripting language interpreter License: (Ruby or BSD) and Public Domain and MIT and CC0 and zlib and UCD URL: https://www.ruby-lang.org/en/ @@ -200,6 +200,28 @@ Patch6028: backport-0002-CVE-2024-35221.patch Patch6029: backport-0003-CVE-2024-35221.patch Patch6030: backport-0004-CVE-2024-35221.patch Patch6031: backport-0005-CVE-2024-35221.patch +Patch6032: backport-Use-string-scanner-with-baseparser-105.patch +Patch6033: backport-Reduce-calls-to-Source-buffer-StringScanner-rest-106.patch +Patch6034: backport-Use-scanner-readline-instead-of-scanner.string-scann.patch +Patch6035: backport-Reduce-calls-to-StringScanner.new-108.patch +Patch6036: backport-Change-loop-in-parse_attributes-to-while-true-.-109.patch.patch +Patch6037: backport-Remove-unnecessary-checks-in-baseparser-112.patch +Patch6038: backport-Use-more-StringScanner-based-API-to-parse-XML-114.patch +Patch6039: backport-source-Remove-unnecessary-string-length-comparisons-.patch +Patch6040: backport-Remove-Source-string-method-117.patch +Patch6041: backport-Separate-IOSource-ensure_buffer-from-IOSource-match-.patch +Patch6042: backport-Optimize-the-parse_attributes-method-to-use-Source-m.patch +Patch6043: backport-Change-attribute.has_key-name-to-attributes-name-.-1.patch +Patch6044: backport-CVE-2024-35176.patch +Patch6045: backport-Suppress-a-warning.patch +Patch6046: backport-Use-Regexp.escape-instead-of-Regexp.union.patch +Patch6047: backport-Add-missing-encode-for-custom-term.patch +Patch6048: backport-Source-read_until-Add-missing-position-move-on-all-r.patch +Patch6049: backport-Optimize-Source-read_until-method-135.patch +Patch6050: backport-Improve-text-parse-performance.patch +Patch6051: backport-Reject-unclosed-DOCTYPE-on-parsing-153.patch +Patch6052: backport-Fix-a-bug-that-a-large-XML-can-t-be-parsed-154.patch +Patch6053: backport-Don-t-include-private_constant-ed-module-155.patch Provides: %{name}-libs = %{version}-%{release} Obsoletes: %{name}-libs < %{version}-%{release} @@ -1198,6 +1220,9 @@ make runruby TESTRUN_SCRIPT=%{SOURCE13} %doc %{gem_dir}/gems/typeprof-%{typeprof_version}/testbed %changelog +* Sat Jun 22 2024 shixuantong - 3.0.3-136 +- fix CVE-2024-35176 + * Tue Jun 18 2024 shixuantong - 3.0.3-135 - fix CVE-2024-35221