英文:
Troubleshooting Regular Expressions in MediaWiki Extension Not Working
问题
正则表达式功能在我正在构建的扩展的onParserBeforePreprocess函数中不起作用,而我不知道原因。
让我详细说明onParserBeforePreprocess函数不起作用的问题。
extension.json:
{
"name": "EnhanceMarkup",
"description": "提供增强标记功能",
"version": "1.0",
"author": [
"Jeong Gaon"
],
"url": "https://www.gaon.xyz/mw_extensions",
"type": "other",
"license-name": "Apache-2.0",
"AutoloadClasses": {
"EnhanceMarkupHooks": "includes/EnhanceMarkupHooks.php"
},
"ResourceModules": {
"ext.EnhanceMarkup.styles": {
"styles": "resources/ext.EnhanceMarkup.styles.css",
"localBasePath": "",
"remoteExtPath": "EnhanceMarkup"
},
"ext.EnhanceMarkup.scripts": {
"scripts": ["resources/ext.EnhanceMarkup.scripts.js", "resources/lib/math.js"],
"localBasePath": "",
"remoteExtPath": "EnhanceMarkup"
}
},
"Hooks": {
"InternalParseBeforeLinks": "EnhanceMarkupHooks::onInternalParseBeforeLinks",
"ParserFirstCallInit": "EnhanceMarkupHooks::onParserFirstCallInit",
"BeforePageDisplay": "EnhanceMarkupHooks::onBeforePageDisplay"
},
"manifest_version": 2
}
includes/EnhanceMarkupHooks.php:
<?php
class EnhanceMarkupHooks
{
public static function onBeforePageDisplay(OutputPage &$out, Skin &$skin)
{
$out->addModules("ext.EnhanceMarkup.styles");
$out->addModules("ext.EnhanceMarkup.scripts");
return true;
}
public static function onParserFirstCallInit(Parser $parser)
{
// 用解析器注册您的自定义解析器函数
$parser->setHook("random", [self::class, "randomRender"]);
return true;
}
public static function onInternalParseBeforeLinks(Parser &$parser, &$text)
{
// - * 4+ == <hr>
// 用水平线替换3-9个'*'、'-'或'_'的序列
$text = preg_replace('/^([-]{3,9})$/m', "<hr>", $text);
// [pagecount]显示所有页面的数量
// 用页面的总数替换[pagecount]
$text = preg_replace_callback(
"/\[pagecount\]/",
function ($matches) use ($parser) {
$dbr = wfGetDB(DB_REPLICA);
$count = $dbr->selectRowCount("page");
return $count;
},
$text
);
// 用<ref group="$1">$2</ref>替换[*A text]
$text = preg_replace(
"/\[\*\s+([^ ]+)\s+(.*?)\]/",
'<ref group="$1">$2</ref>',
$text
);
// 用<ref group="$1" />替换[*A]
$text = preg_replace(
"/\[\*\s+([^ ]+)\s*\]/",
'<ref group="$1" />',
$text
);
// 用<ref>$1</ref>替换[* text]
$text = preg_replace("/\[\*\s+(.*?)\]/", '<ref>$1</ref>', $text);
// 用{{$1}}替换[include text]
$text = preg_replace("/\[\include\s+(.*?)\]/", '{{$1}}', $text);
// 用<br>替换[br]
$text = str_replace("[br]", "<br>", $text);
// 字体大小增加{{{+1 (content) }}} - 范围:1~5
$text = preg_replace_callback('/\{\{\{\+([1-5])\s*(.*?)\s*\}\}\}/s', function($matches) {
return '<span style="font-size:'.(1 + $matches[1]).'em;">'.$matches[2].'</span>';
}, $text);
// 字体大小减小{{{-1 (content) }}} - 范围:1~5
$text = preg_replace_callback('/\{\{\{-([1-5])\s*(.*?)\s*\}\}\}/s', function($matches) {
return '<span style="font-size:'.(1 - $matches[1]/10).'em;">'.$matches[2].'</span>';
}, $text);
return true;
}
// 随机
// <random range="50">True|False</random>
public static function randomRender(
$input,
array $args,
Parser $parser,
PPFrame $frame
) {
// 禁用缓存
$parser->getOutput()->updateCacheExpiry(0);
// 解析输入
$parts = explode("|", $input);
// 从参数中获取范围
$range = isset($args["range"]) ? $args["range"] : 2; // 默认为2
// 在范围内生成随机数
$randomNumber = mt_rand(1, $range);
// 根据随机数选择输出
if ($randomNumber <= $range / 2) {
// 如果随机数在范围的前一半,返回第一个部分
return $parts[0];
} else {
// 否则,返回第二个部分(如果存在),否则返回第一个部分
return isset($parts[1]) ? $parts[1] : $parts[0];
}
}
}
查看代码,似乎没有特别的问题 - 如果它应该工作,像[* texts]这样在维基中键入应该生成一个名为texts的脚注,但出于某种原因它输出文字。
例如,如果您键入'hello[br]world',您应该在hello下面看到world,但什么也没有。
我的MediaWiki网站地址是https://www.gaonwiki.com
如果您需要更多信息,请告诉我。谢谢。
英文:
The regex feature of the onParserBeforePreprocess function doesn't work in the extension I'm building, and I don't know why.
Let me elaborate on the issue with the onParserBeforePreprocess function not working.
extension.json:
{
"name": "EnhanceMarkup",
"description": "Provides enhanced markup functionalities",
"version": "1.0",
"author": [
"Jeong Gaon"
],
"url": "https://www.gaon.xyz/mw_extensions",
"type": "other",
"license-name": "Apache-2.0",
"AutoloadClasses": {
"EnhanceMarkupHooks": "includes/EnhanceMarkupHooks.php"
},
"ResourceModules": {
"ext.EnhanceMarkup.styles": {
"styles": "resources/ext.EnhanceMarkup.styles.css",
"localBasePath": "",
"remoteExtPath": "EnhanceMarkup"
},
"ext.EnhanceMarkup.scripts": {
"scripts": ["resources/ext.EnhanceMarkup.scripts.js", "resources/lib/math.js"],
"localBasePath": "",
"remoteExtPath": "EnhanceMarkup"
}
},
"Hooks": {
"InternalParseBeforeLinks": "EnhanceMarkupHooks::onInternalParseBeforeLinks",
"ParserFirstCallInit": "EnhanceMarkupHooks::onParserFirstCallInit",
"BeforePageDisplay": "EnhanceMarkupHooks::onBeforePageDisplay"
},
"manifest_version": 2
}
includes/EnhanceMarkupHooks.php:
<?php
class EnhanceMarkupHooks
{
public static function onBeforePageDisplay(OutputPage &$out, Skin &$skin)
{
$out->addModules("ext.EnhanceMarkup.styles");
$out->addModules("ext.EnhanceMarkup.scripts");
return true;
}
public static function onParserFirstCallInit(Parser $parser)
{
// Register each of your custom parser functions with the parser
$parser->setHook("random", [self::class, "randomRender"]);
return true;
}
public static function onInternalParseBeforeLinks(Parser &$parser, &$text)
{
// - * 4+ == <hr>
// Replace sequences of 3-9 '*', '-', or '_' with a horizontal rule
$text = preg_replace('/^([-]{3,9})$/m', "<hr>", $text);
// [pagecount] show all count of page
// Replace [pagecount] with the total number of pages
$text = preg_replace_callback(
"/\[pagecount\]/",
function ($matches) use ($parser) {
$dbr = wfGetDB(DB_REPLICA);
$count = $dbr->selectRowCount("page");
return $count;
},
$text
);
// Replace [*A text] with <ref group="A">text</ref>
$text = preg_replace(
"/\[\*\s+([^ ]+)\s+(.*?)\]/",
'<ref group="$1">$2</ref>',
$text
);
// Replace [*A] with <ref group="A" />
$text = preg_replace(
"/\[\*\s+([^ ]+)\s*\]/",
'<ref group="$1" />',
$text
);
// Replace [* text] with <ref>text</ref>
$text = preg_replace("/\[\*\s+(.*?)\]/", '<ref>$1</ref>', $text);
// Replace [include text] with {{text}}
$text = preg_replace("/\[\include\s+(.*?)\]/", '{{$1}}', $text);
// Replace [br] with <br>
$text = str_replace("[br]", "<br>", $text);
// Font Size up {{{+1 (content) }}} - Range: 1~5
$text = preg_replace_callback('/\{\{\{\+([1-5])\s*(.*?)\s*\}\}\}/s', function($matches) {
return '<span style="font-size:'.(1 + $matches[1]).'em;">'.$matches[2].'</span>';
}, $text);
// Font Size down {{{-1 (content) }}} - Range: 1~5
$text = preg_replace_callback('/\{\{\{-([1-5])\s*(.*?)\s*\}\}\}/s', function($matches) {
return '<span style="font-size:'.(1 - $matches[1]/10).'em;">'.$matches[2].'</span>';
}, $text);
return true;
}
// Random
// <random range="50">True|False</random>
public static function randomRender(
$input,
array $args,
Parser $parser,
PPFrame $frame
) {
// Disable caching
$parser->getOutput()->updateCacheExpiry(0);
// Parse the input
$parts = explode("|", $input);
// Get the range from args
$range = isset($args["range"]) ? $args["range"] : 2; // default to 2
// Generate a random number within the range
$randomNumber = mt_rand(1, $range);
// Choose the output based on the random number
if ($randomNumber <= $range / 2) {
// If the random number is in the first half of the range, return the first part
return $parts[0];
} else {
// Otherwise, return the second part if it exists, or the first part if it doesn't
return isset($parts[1]) ? $parts[1] : $parts[0];
}
}
}
Looking at the code, there doesn't seem to be anything particularly wrong with it - if it's supposed to work, typing something like [* texts] within the wiki should generate a footnote called texts, but for some reason it's outputting literally.
For example, if you type 'hello[br]world', you should see world under hello, but nothing.
My MediaWiki site address is https://www.gaonwiki.com
Let me know if you need any more information. I'll provide it. Thank you.
答案1
得分: 1
A) 为了匹配你所描述的 [*A Text]
引用,我会将模式更正如下:
/\[\*(?<group>\w+)\s+(?<text>[^\]]+)\]/
想法是使用命名捕获组,格式为 (?<group_name>...pattern...)
,并且在 \w+
中更加精确地匹配单词字符,然后使用 \s+
匹配一个或多个空格,最后使用 [^\]]+
匹配除了闭括号之外的任何字符。
替换变成了 <ref group="$group">$text</ref>
这里有一些测试链接:https://regex101.com/r/vueNcM/2
B) 步骤2,为了仅匹配 [*A]
,我会使用 /\[\*(?<group>\w+)\]/
,然后替换为 <ref group="$group" />
这里也有测试链接:https://regex101.com/r/gYFOzO/2
C) 步骤3,将 [* text]
替换为 <ref>text</ref>
,我首先会使用 /\[\*\s+(?<text>[^\]]+)\]/
,然后替换为 <ref>$text</ref>
。
这里有测试链接:https://regex101.com/r/aYTOH9/1
但是,如果你想允许在文本中使用转义括号(以防用户需要在文本中包含一些括号),那么可以使用/\[\*\s+(?<text>(?:\\\]|[^\]])+)\]/
。
测试链接:https://regex101.com/r/aYTOH9/2
对于这种情况,你将需要使用 preg_replace_callback() 而不是简单的 preg_replace(),因为我们需要取消转义括号:
$text = preg_replace_callback(
'/\[\*\s+(?<text>(?:\\\\\]|[^\]])+)\]/',
function ($matches) {
return '<ref>' .
preg_replace('/\\\\([\[\]])/', '$1', $matches['text']) .
'</ref>';
},
$text
);
在这里测试 PHP 代码:https://onlinephp.io/c/2b5249
创建过滤器时的安全问题
如果用户输入以下内容会发生什么?
Shit happens with [* <script>alert('I got you')</script>]
是否需要另一个过滤器来防止跨站脚本攻击(XSS攻击)?
如果没有安全地转义,那么将所有的 preg_replace() 调用替换为 preg_replace_callback(),就像示例C)中那样,并在捕获的值上执行净化操作:
// 将 [* Some text] 替换为 <ref>Some text</ref>
// 同样处理文本中的转义括号,例如 [* An \[important\] reference]
$text = preg_replace_callback(
'/\[\*\s+(?<text>(?:\\\\\]|[^\]])+)\]/',
function ($matches) {
// 1) 取消转义 "\[" 和 "\]" 为 "[" 和相应的 "]"。
// 2) 由于我们正在创建HTML,文本应该被转义,因为它可能包含一些内容,例如 <strong>Bold</strong> 或更糟糕的一些JavaScript,例如 <script>alert('XSS攻击')</script>。
return '<ref>' .
htmlspecialchars(
preg_replace('/\\\\([\[\]])/', '$1', $matches['text'])
) .
'</ref>';
},
$text
);
在这里测试 PHP 代码:https://onlinephp.io/c/8a7f8
英文:
A) To match your references described by [*A Text]
, I would correct
the pattern like this:
/\[\*(?<group>\w+)\s+(?<text>[^\]]+)\]/
The idea is to use named capturing groups with
(?<group_name>...pattern...)
and also to be a bit more precise with \w+
to match word characters,
then \s+
for one or several spaces and then any char which isn't the
closing bracket with [^\]]+
.
The replacement becomes <ref group="$group">$text</ref>
Here are some tests of it: https://regex101.com/r/vueNcM/2
B) Step 2, to match only [*A]
, I would use
/\[\*(?<group>\w+)\]/
and replace it with <ref group="$group" />
Here are the tests too: https://regex101.com/r/gYFOzO/2
C) Step 3, to replace [* text]
with <ref>text</ref>
, I would
use first use /\[\*\s+(?<text>[^\]]+)\]/
and replace it by
<ref>$text</ref>
.
Tests available here: https://regex101.com/r/aYTOH9/1
But if you want to allow escaped bracket in the text (in case
the user needs to have some brackets in the text, then use
/\[\*\s+(?<text>(?:\\\]|[^\]])+)\]/
Tests: https://regex101.com/r/aYTOH9/2
For this situation, you'll have to do a preg_replace_callback()
instead of a simple preg_replace() because we have to unescape
the brackets:
$text = preg_replace_callback(
'/\[\*\s+(?<text>(?:\\\\\]|[^\]])+)\]/',
function ($matches) {
return '<ref>' .
preg_replace('/\\\\([\[\]])/', '$1', $matches['text']) .
'</ref>';
},
$text
);
Test the PHP here: https://onlinephp.io/c/2b5249
Security concerns when creating filters
What happens if the user inputs this?
Shit happens with [* <script>alert('I got you')</script>]
Will there be another filter to avoid XSS attacks?
If it's not safely escaped, then replace all your preg_replace()
calls by a preg_replace_callback() like in example C) above and
do the sanitizing operations on the captured values:
// Replace [* Some text] by <ref>Some text</ref>
// Also handle escaped brackets in text, such as [* An \[important\] reference]
$text = preg_replace_callback(
// In the pattern, \ should be doubled, but only for known PHP escaped
// sequences, such as \t, \n, \a, or \\. This makes the pattern below not
// very readable :-( In JavaSript it would be simple like this:
// /\[\*\s+(?<text>(?:\\\]|[^\]])+)\]/
'/\[\*\s+(?<text>(?:\\\\\]|[^\]])+)\]/',
function ($matches) {
// 1) Unescape "\[" or "\]" by "[" and respectively "]".
// 2) As we are creating HTML, the text should be sanitized as it may
// contain some stuff like <strong>Bold</strong> or worse some JavaScript
// <script>alert('XSS attack')</script>.
return '<ref>' .
htmlspecialchars(
preg_replace('/\\\\([\[\]])/', '$1', $matches['text'])
) .
'</ref>';
},
$text
);
PHP code in action here: https://onlinephp.io/c/8a7f8
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论