2022-02-03 15:26:44 +00:00
|
|
|
namespace ASC.Common.Utils;
|
|
|
|
|
|
|
|
public static class HtmlUtil
|
2019-05-15 14:56:09 +00:00
|
|
|
{
|
2022-02-10 12:14:50 +00:00
|
|
|
private static readonly Regex _tagReplacer
|
2022-02-03 15:26:44 +00:00
|
|
|
= new Regex("<[^>]*>", RegexOptions.Multiline | RegexOptions.Compiled);
|
2022-02-03 14:18:58 +00:00
|
|
|
|
2022-02-10 12:14:50 +00:00
|
|
|
private static readonly Regex _commentsReplacer
|
2022-02-03 15:26:44 +00:00
|
|
|
= new Regex("<!--(?s).*?-->", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
2022-02-03 14:18:58 +00:00
|
|
|
|
2022-02-10 12:14:50 +00:00
|
|
|
private static readonly Regex _xssReplacer
|
2022-02-03 15:26:44 +00:00
|
|
|
= new Regex(@"<\s*(style|script)[^>]*>(.*?)<\s*/\s*(style|script)>", RegexOptions.IgnoreCase
|
|
|
|
| RegexOptions.CultureInvariant | RegexOptions.Compiled | RegexOptions.Singleline);
|
2022-02-03 14:18:58 +00:00
|
|
|
|
2022-02-10 12:14:50 +00:00
|
|
|
private static readonly Regex _worder =
|
2022-02-03 15:26:44 +00:00
|
|
|
new Regex(@"\S+", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
|
2019-05-15 14:56:09 +00:00
|
|
|
|
2022-02-03 15:26:44 +00:00
|
|
|
public static string GetText(string html, int maxLength = 0, string endBlockTemplate = "...")
|
|
|
|
{
|
|
|
|
var unformatedText = string.Empty;
|
|
|
|
|
|
|
|
if (!string.IsNullOrEmpty(html))
|
2019-05-15 14:56:09 +00:00
|
|
|
{
|
2022-02-10 12:14:50 +00:00
|
|
|
html = _xssReplacer.Replace(html, string.Empty); //Clean malicious tags. <script> <style>
|
2022-02-01 14:51:29 +00:00
|
|
|
|
2022-02-08 11:07:28 +00:00
|
|
|
if (string.IsNullOrEmpty(html))
|
|
|
|
{
|
|
|
|
return html;
|
|
|
|
}
|
2019-05-15 14:56:09 +00:00
|
|
|
|
2022-02-10 12:14:50 +00:00
|
|
|
unformatedText = _tagReplacer.Replace(html, string.Empty);
|
2019-05-15 14:56:09 +00:00
|
|
|
|
2022-02-03 15:26:44 +00:00
|
|
|
if (!string.IsNullOrEmpty(unformatedText))
|
|
|
|
{
|
|
|
|
// kill comments
|
2022-02-10 12:14:50 +00:00
|
|
|
unformatedText = _commentsReplacer.Replace(unformatedText, string.Empty);
|
2022-02-03 15:26:44 +00:00
|
|
|
unformatedText = unformatedText.Trim();
|
2019-05-15 14:56:09 +00:00
|
|
|
|
|
|
|
if (!string.IsNullOrEmpty(unformatedText))
|
|
|
|
{
|
2022-02-03 15:26:44 +00:00
|
|
|
if (maxLength == 0 || unformatedText.Length < maxLength)
|
2022-02-08 11:07:28 +00:00
|
|
|
{
|
2022-02-03 15:26:44 +00:00
|
|
|
return HttpUtility.HtmlDecode(unformatedText);
|
2022-02-08 11:07:28 +00:00
|
|
|
}
|
2019-05-15 14:56:09 +00:00
|
|
|
|
2022-02-03 15:26:44 +00:00
|
|
|
//Set maximum length with end block
|
|
|
|
maxLength = Math.Max(0, maxLength - endBlockTemplate.Length);
|
|
|
|
var startIndex = Math.Max(0, Math.Min(unformatedText.Length - 1, maxLength));
|
|
|
|
var countToScan = Math.Max(0, startIndex - 1);
|
2019-05-15 14:56:09 +00:00
|
|
|
|
2022-02-03 15:26:44 +00:00
|
|
|
var lastSpaceIndex = unformatedText.LastIndexOf(' ', startIndex, countToScan);
|
2019-05-15 14:56:09 +00:00
|
|
|
|
2022-02-03 15:26:44 +00:00
|
|
|
unformatedText = lastSpaceIndex > 0 && lastSpaceIndex < unformatedText.Length
|
|
|
|
? unformatedText.Remove(lastSpaceIndex)
|
|
|
|
: unformatedText.Substring(0, maxLength);
|
2022-02-01 14:51:29 +00:00
|
|
|
|
2022-02-03 15:26:44 +00:00
|
|
|
if (!string.IsNullOrEmpty(endBlockTemplate))
|
2022-02-08 11:07:28 +00:00
|
|
|
{
|
2022-02-03 15:26:44 +00:00
|
|
|
unformatedText += endBlockTemplate;
|
2022-02-08 11:07:28 +00:00
|
|
|
}
|
2019-05-15 14:56:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-03 15:26:44 +00:00
|
|
|
return HttpUtility.HtmlDecode(unformatedText);//TODO:!!!
|
|
|
|
}
|
2019-05-15 14:56:09 +00:00
|
|
|
|
2022-02-08 11:07:28 +00:00
|
|
|
public static string ToPlainText(string html)
|
|
|
|
{
|
|
|
|
return GetText(html);
|
|
|
|
}
|
2019-05-15 14:56:09 +00:00
|
|
|
|
2022-02-03 15:26:44 +00:00
|
|
|
/// <summary>
|
|
|
|
/// The function highlight all words in htmlText by searchText.
|
|
|
|
/// </summary>
|
|
|
|
/// <param name="searchText">the space separated string</param>
|
|
|
|
/// <param name="htmlText">html for highlight</param>
|
|
|
|
/// <param name="withoutLink"></param>
|
|
|
|
/// <returns>highlighted html</returns>
|
|
|
|
public static string SearchTextHighlight(string searchText, string htmlText, bool withoutLink = false)
|
|
|
|
{
|
2022-02-08 11:07:28 +00:00
|
|
|
if (string.IsNullOrEmpty(searchText) || string.IsNullOrEmpty(htmlText))
|
|
|
|
{
|
|
|
|
return htmlText;
|
|
|
|
}
|
2022-02-01 14:51:29 +00:00
|
|
|
|
2022-02-11 13:17:55 +00:00
|
|
|
var regexpstr = _worder.Matches(searchText).Select(m => m.Value).Distinct().Aggregate((r, n) => r + "|" + n);
|
2022-02-03 15:26:44 +00:00
|
|
|
var wordsFinder = new Regex(Regex.Escape(regexpstr), RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Multiline);
|
2022-02-07 14:53:58 +00:00
|
|
|
return wordsFinder.Replace(htmlText, m => "<span class='searchTextHighlight" + (withoutLink ? " bold" : string.Empty) + "'>" + m.Value + "</span>");
|
2019-05-15 14:56:09 +00:00
|
|
|
}
|
2022-02-08 11:07:28 +00:00
|
|
|
}
|