DocSpace-client/common/ASC.Common/Utils/HtmlUtil.cs

117 lines
5.2 KiB
C#
Raw Normal View History

2022-03-15 18:00:53 +00:00
// (c) Copyright Ascensio System SIA 2010-2022
//
// This program is a free software product.
// You can redistribute it and/or modify it under the terms
// of the GNU Affero General Public License (AGPL) version 3 as published by the Free Software
// Foundation. In accordance with Section 7(a) of the GNU AGPL its Section 15 shall be amended
// to the effect that Ascensio System SIA expressly excludes the warranty of non-infringement of
// any third-party rights.
//
// This program is distributed WITHOUT ANY WARRANTY, without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For details, see
// the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html
//
// You can contact Ascensio System SIA at Lubanas st. 125a-25, Riga, Latvia, EU, LV-1021.
//
// The interactive user interfaces in modified source and object code versions of the Program must
// display Appropriate Legal Notices, as required under Section 5 of the GNU AGPL version 3.
//
// Pursuant to Section 7(b) of the License you must retain the original Product logo when
// distributing the program. Pursuant to Section 7(e) we decline to grant you any rights under
// trademark law for use of our trademarks.
//
// All the Product's GUI elements, including illustrations and icon sets, as well as technical writing
// content are licensed under the terms of the Creative Commons Attribution-ShareAlike 4.0
// International. See the License terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode
namespace ASC.Common.Utils;
public static class HtmlUtil
2019-05-15 14:56:09 +00:00
{
2022-02-10 12:14:50 +00:00
private static readonly Regex _tagReplacer
= new Regex("<[^>]*>", RegexOptions.Multiline | RegexOptions.Compiled);
2022-02-03 14:18:58 +00:00
2022-02-10 12:14:50 +00:00
private static readonly Regex _commentsReplacer
= new Regex("<!--(?s).*?-->", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
2022-02-03 14:18:58 +00:00
2022-02-10 12:14:50 +00:00
private static readonly Regex _xssReplacer
= new Regex(@"<\s*(style|script)[^>]*>(.*?)<\s*/\s*(style|script)>", RegexOptions.IgnoreCase
| RegexOptions.CultureInvariant | RegexOptions.Compiled | RegexOptions.Singleline);
2022-02-03 14:18:58 +00:00
2022-02-10 12:14:50 +00:00
private static readonly Regex _worder =
new Regex(@"\S+", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
2019-05-15 14:56:09 +00:00
public static string GetText(string html, int maxLength = 0, string endBlockTemplate = "...")
{
var unformatedText = string.Empty;
if (!string.IsNullOrEmpty(html))
2019-05-15 14:56:09 +00:00
{
2022-02-10 12:14:50 +00:00
html = _xssReplacer.Replace(html, string.Empty); //Clean malicious tags. <script> <style>
2022-02-08 11:07:28 +00:00
if (string.IsNullOrEmpty(html))
{
return html;
}
2019-05-15 14:56:09 +00:00
2022-02-10 12:14:50 +00:00
unformatedText = _tagReplacer.Replace(html, string.Empty);
2019-05-15 14:56:09 +00:00
if (!string.IsNullOrEmpty(unformatedText))
{
// kill comments
2022-02-10 12:14:50 +00:00
unformatedText = _commentsReplacer.Replace(unformatedText, string.Empty);
unformatedText = unformatedText.Trim();
2019-05-15 14:56:09 +00:00
if (!string.IsNullOrEmpty(unformatedText))
{
if (maxLength == 0 || unformatedText.Length < maxLength)
2022-02-08 11:07:28 +00:00
{
return HttpUtility.HtmlDecode(unformatedText);
2022-02-08 11:07:28 +00:00
}
2019-05-15 14:56:09 +00:00
//Set maximum length with end block
maxLength = Math.Max(0, maxLength - endBlockTemplate.Length);
var startIndex = Math.Max(0, Math.Min(unformatedText.Length - 1, maxLength));
var countToScan = Math.Max(0, startIndex - 1);
2019-05-15 14:56:09 +00:00
var lastSpaceIndex = unformatedText.LastIndexOf(' ', startIndex, countToScan);
2019-05-15 14:56:09 +00:00
unformatedText = lastSpaceIndex > 0 && lastSpaceIndex < unformatedText.Length
? unformatedText.Remove(lastSpaceIndex)
: unformatedText.Substring(0, maxLength);
if (!string.IsNullOrEmpty(endBlockTemplate))
2022-02-08 11:07:28 +00:00
{
unformatedText += endBlockTemplate;
2022-02-08 11:07:28 +00:00
}
2019-05-15 14:56:09 +00:00
}
}
}
return HttpUtility.HtmlDecode(unformatedText);//TODO:!!!
}
2019-05-15 14:56:09 +00:00
2022-02-08 11:07:28 +00:00
public static string ToPlainText(string html)
{
return GetText(html);
}
2019-05-15 14:56:09 +00:00
/// <summary>
/// The function highlight all words in htmlText by searchText.
/// </summary>
/// <param name="searchText">the space separated string</param>
/// <param name="htmlText">html for highlight</param>
/// <param name="withoutLink"></param>
/// <returns>highlighted html</returns>
public static string SearchTextHighlight(string searchText, string htmlText, bool withoutLink = false)
{
2022-02-08 11:07:28 +00:00
if (string.IsNullOrEmpty(searchText) || string.IsNullOrEmpty(htmlText))
{
return htmlText;
}
Merge branch 'feature/backend-refactor' into feature/asc-common-refactor # Conflicts: # common/ASC.Api.Core/GlobalUsings.cs # common/ASC.Common/Caching/AscCache.cs # common/ASC.Common/Collections/CachedDictionaryBase.cs # common/ASC.Common/Collections/HttpRequestDictionary.cs # common/ASC.Common/DIHelper.cs # common/ASC.Common/Data/StreamExtension.cs # common/ASC.Common/Data/TempStream.cs # common/ASC.Common/DependencyInjection/AutofacExtension.cs # common/ASC.Common/Logging/Log.cs # common/ASC.Common/Logging/SelfCleaningTarget.cs # common/ASC.Common/Logging/SpecialFolderPathConverter.cs # common/ASC.Common/Mapping/MappingProfile.cs # common/ASC.Common/Security/AscRandom.cs # common/ASC.Common/Security/Authorizing/AuthorizingException.cs # common/ASC.Common/Security/Authorizing/Constants.cs # common/ASC.Common/Security/Authorizing/Domain/Role.cs # common/ASC.Common/Security/Cryptography/Hasher.cs # common/ASC.Common/Threading/DistributedTask.cs # common/ASC.Common/Threading/DistributedTaskQueue.cs # common/ASC.Common/Utils/DnsLookup.cs # common/ASC.Common/Utils/HtmlUtil.cs # common/ASC.Common/Utils/HttpRequestExtensions.cs # common/ASC.Common/Utils/JsonWebToken.cs # common/ASC.Common/Utils/MailAddressUtils.cs # common/ASC.Common/Utils/RandomString.cs # common/ASC.Common/Utils/TimeZoneConverter/TimeZoneConverter.cs # common/ASC.Common/Utils/VelocityFormatter.cs # common/ASC.Common/Utils/Wildcard.cs # common/ASC.Common/Web/MimeMapping.cs # common/ASC.Common/Web/VirtualPathUtility.cs # common/services/ASC.ClearEvents/Program.cs # products/ASC.Files/Core/ThirdPartyApp/BoxApp.cs # products/ASC.Files/Core/ThirdPartyApp/GoogleDriveApp.cs
2022-02-11 13:17:55 +00:00
var regexpstr = _worder.Matches(searchText).Select(m => m.Value).Distinct().Aggregate((r, n) => r + "|" + n);
var wordsFinder = new Regex(Regex.Escape(regexpstr), RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Multiline);
2022-02-07 14:53:58 +00:00
return wordsFinder.Replace(htmlText, m => "<span class='searchTextHighlight" + (withoutLink ? " bold" : string.Empty) + "'>" + m.Value + "</span>");
2019-05-15 14:56:09 +00:00
}
2022-02-08 11:07:28 +00:00
}