Bladeren bron

LibRegex: Generate negated property escapes as a single instruction

These were previously generated as two instructions, Compare [Inverse]
and Compare [Property].
Timothy Flynn 4 jaren geleden
bovenliggende
commit
dc9f516339
2 gewijzigde bestanden met toevoegingen van 16 en 3 verwijderingen
  1. 10 0
      Tests/LibRegex/Regex.cpp
  2. 6 3
      Userland/Libraries/LibRegex/RegexParser.cpp

+ 10 - 0
Tests/LibRegex/Regex.cpp

@@ -656,17 +656,27 @@ TEST_CASE(ECMA262_property_match)
         { "\\p{ASCII}", "p{ASCII}", true },
         { "\\p{ASCII}", "a", true, ECMAScriptFlags::Unicode },
         { "\\p{ASCII}", "😀", false, ECMAScriptFlags::Unicode },
+        { "\\P{ASCII}", "a", false, ECMAScriptFlags::Unicode },
+        { "\\P{ASCII}", "😀", true, ECMAScriptFlags::Unicode },
         { "\\p{ASCII_Hex_Digit}", "1", true, ECMAScriptFlags::Unicode },
         { "\\p{ASCII_Hex_Digit}", "a", true, ECMAScriptFlags::Unicode },
         { "\\p{ASCII_Hex_Digit}", "x", false, ECMAScriptFlags::Unicode },
+        { "\\P{ASCII_Hex_Digit}", "1", false, ECMAScriptFlags::Unicode },
+        { "\\P{ASCII_Hex_Digit}", "a", false, ECMAScriptFlags::Unicode },
+        { "\\P{ASCII_Hex_Digit}", "x", true, ECMAScriptFlags::Unicode },
         { "\\p{Any}", "\xcd\xb8", true, ECMAScriptFlags::Unicode },       // U+0378, which is an unassigned code point.
+        { "\\P{Any}", "\xcd\xb8", false, ECMAScriptFlags::Unicode },      // U+0378, which is an unassigned code point.
         { "\\p{Assigned}", "\xcd\xb8", false, ECMAScriptFlags::Unicode }, // U+0378, which is an unassigned code point.
+        { "\\P{Assigned}", "\xcd\xb8", true, ECMAScriptFlags::Unicode },  // U+0378, which is an unassigned code point.
         { "\\p{Lu}", "a", false, ECMAScriptFlags::Unicode },
         { "\\p{Lu}", "A", true, ECMAScriptFlags::Unicode },
         { "\\p{Lu}", "9", false, ECMAScriptFlags::Unicode },
         { "\\p{Cased_Letter}", "a", true, ECMAScriptFlags::Unicode },
         { "\\p{Cased_Letter}", "A", true, ECMAScriptFlags::Unicode },
         { "\\p{Cased_Letter}", "9", false, ECMAScriptFlags::Unicode },
+        { "\\P{Cased_Letter}", "a", false, ECMAScriptFlags::Unicode },
+        { "\\P{Cased_Letter}", "A", false, ECMAScriptFlags::Unicode },
+        { "\\P{Cased_Letter}", "9", true, ECMAScriptFlags::Unicode },
         { "\\p{General_Category=Cased_Letter}", "a", true, ECMAScriptFlags::Unicode },
         { "\\p{General_Category=Cased_Letter}", "A", true, ECMAScriptFlags::Unicode },
         { "\\p{General_Category=Cased_Letter}", "9", false, ECMAScriptFlags::Unicode },

+ 6 - 3
Userland/Libraries/LibRegex/RegexParser.cpp

@@ -1546,15 +1546,18 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
         bool negated = false;
 
         if (parse_unicode_property_escape(property, negated)) {
+            Vector<CompareTypeAndValuePair> compares;
             if (negated)
-                stack.insert_bytecode_compare_values({ { CharacterCompareType::Inverse, 0 } });
+                compares.empend(CompareTypeAndValuePair { CharacterCompareType::Inverse, 0 });
             property.visit(
                 [&](Unicode::Property property) {
-                    stack.insert_bytecode_compare_values({ { CharacterCompareType::Property, (ByteCodeValueType)(property) } });
+                    compares.empend(CompareTypeAndValuePair { CharacterCompareType::Property, (ByteCodeValueType)property });
                 },
                 [&](Unicode::GeneralCategory general_category) {
-                    stack.insert_bytecode_compare_values({ { CharacterCompareType::GeneralCategory, (ByteCodeValueType)(general_category) } });
+                    compares.empend(CompareTypeAndValuePair { CharacterCompareType::GeneralCategory, (ByteCodeValueType)general_category });
                 });
+            stack.insert_bytecode_compare_values(move(compares));
+            match_length_minimum += 1;
             return true;
         }
     }