DocSpace-client/common/services/ASC.ElasticSearch/Engine/BaseIndexer.cs

648 lines
24 KiB
C#
Raw Normal View History

2020-01-24 13:07:51 +00:00
/*
*
* (c) Copyright Ascensio System Limited 2010-2018
*
* This program is freeware. You can redistribute it and/or modify it under the terms of the GNU
* General Public License (GPL) version 3 as published by the Free Software Foundation (https://www.gnu.org/copyleft/gpl.html).
* In accordance with Section 7(a) of the GNU GPL its Section 15 shall be amended to the effect that
* Ascensio System SIA expressly excludes the warranty of non-infringement of any third-party rights.
*
* THIS PROGRAM IS DISTRIBUTED WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR
* FITNESS FOR A PARTICULAR PURPOSE. For more details, see GNU GPL at https://www.gnu.org/copyleft/gpl.html
*
* You can contact Ascensio System SIA by email at sales@onlyoffice.com
*
* The interactive user interfaces in modified source and object code versions of ONLYOFFICE must display
* Appropriate Legal Notices, as required under Section 5 of the GNU GPL version 3.
*
* Pursuant to Section 7 § 3(b) of the GNU GPL you must retain the original ONLYOFFICE logo which contains
* relevant author attributions when distributing the software. If the display of the logo in its graphic
* form is not reasonably feasible for technical reasons, you must include the words "Powered by ONLYOFFICE"
* in every copy of the program you distribute.
* Pursuant to Section 7 § 3(e) we decline to grant you any rights under trademark law for use of our trademarks.
*
*/
using System;
using System.Collections;
2020-02-17 08:58:14 +00:00
using System.Collections.Concurrent;
2020-01-24 13:07:51 +00:00
using System.Collections.Generic;
using System.Linq;
using System.Linq.Expressions;
using System.Text;
2020-04-23 09:38:50 +00:00
using System.Threading.Tasks;
2020-01-24 13:07:51 +00:00
2020-02-17 08:58:14 +00:00
using ASC.Common;
2020-01-24 13:07:51 +00:00
using ASC.Common.Caching;
using ASC.Common.Logging;
2020-02-17 08:58:14 +00:00
using ASC.Core;
using ASC.Core.Common.EF;
using ASC.Core.Common.EF.Context;
2020-04-27 16:59:52 +00:00
using ASC.Core.Common.EF.Model;
2020-01-24 13:07:51 +00:00
using ASC.ElasticSearch.Core;
using ASC.ElasticSearch.Service;
2020-02-17 08:58:14 +00:00
using Autofac;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
2020-01-24 13:07:51 +00:00
using Nest;
namespace ASC.ElasticSearch
2020-02-17 08:58:14 +00:00
{
public class BaseIndexerHelper
{
2020-01-24 13:07:51 +00:00
public ConcurrentDictionary<string, bool> IsExist { get; set; }
2020-02-17 08:58:14 +00:00
private readonly ICacheNotify<SearchItem> Notify;
public BaseIndexerHelper(ICacheNotify<SearchItem> cacheNotify)
{
IsExist = new ConcurrentDictionary<string, bool>();
2020-01-24 13:07:51 +00:00
Notify = cacheNotify;
Notify.Subscribe((a) =>
2020-02-17 08:58:14 +00:00
{
2020-01-24 13:07:51 +00:00
IsExist.AddOrUpdate(a.Id, false, (q, w) => false);
2020-02-17 08:58:14 +00:00
}, CacheNotifyAction.Any);
}
public void Clear<T>(T t) where T : class, ISearchItem
2020-02-17 08:58:14 +00:00
{
Notify.Publish(new SearchItem() { Id = t.IndexName }, CacheNotifyAction.Any);
}
}
2020-04-27 16:59:52 +00:00
public class BaseIndexer<T> where T : class, ISearchItem
2020-01-24 13:07:51 +00:00
{
private static readonly object Locker = new object();
2020-02-18 08:39:28 +00:00
protected internal T Wrapper { get { return ServiceProvider.GetService<T>(); } }
2020-01-24 13:07:51 +00:00
2020-02-17 08:58:14 +00:00
public string IndexName { get { return Wrapper.IndexName; } }
2020-04-27 16:59:52 +00:00
private const int QueryLimit = 1000;
2020-02-17 08:58:14 +00:00
public bool IsExist { get; set; }
public Client Client { get; }
public ILog Log { get; }
public TenantManager TenantManager { get; }
public SearchSettingsHelper SearchSettingsHelper { get; }
2020-05-03 15:50:40 +00:00
public BaseIndexerHelper BaseIndexerHelper { get; }
public Settings Settings { get; }
2020-02-17 08:58:14 +00:00
public IServiceProvider ServiceProvider { get; }
public WebstudioDbContext WebstudioDbContext { get; }
2020-02-17 08:58:14 +00:00
public BaseIndexer(
Client client,
IOptionsMonitor<ILog> log,
DbContextManager<WebstudioDbContext> dbContextManager,
TenantManager tenantManager,
SearchSettingsHelper searchSettingsHelper,
2020-05-03 15:50:40 +00:00
BaseIndexerHelper baseIndexerHelper,
Settings settings,
2020-02-11 15:10:30 +00:00
IServiceProvider serviceProvider)
2020-01-24 13:07:51 +00:00
{
2020-02-17 08:58:14 +00:00
Client = client;
Log = log.CurrentValue;
TenantManager = tenantManager;
SearchSettingsHelper = searchSettingsHelper;
2020-05-03 15:50:40 +00:00
BaseIndexerHelper = baseIndexerHelper;
Settings = settings;
2020-02-17 08:58:14 +00:00
ServiceProvider = serviceProvider;
WebstudioDbContext = dbContextManager.Value;
2020-01-24 13:07:51 +00:00
}
internal void Index(T data, bool immediately = true)
{
2020-04-27 16:59:52 +00:00
Client.Instance.Index(data, idx => GetMeta(idx, data, immediately));
2020-01-24 13:07:51 +00:00
}
internal void Index(List<T> data, bool immediately = true)
2020-02-17 08:58:14 +00:00
{
//CreateIfNotExist(data[0]);
2020-04-28 15:11:10 +00:00
if (!data.Any()) return;
2020-02-17 08:58:14 +00:00
2020-04-28 15:11:10 +00:00
if (data[0] is ISearchItemDocument)
2020-01-24 13:07:51 +00:00
{
var currentLength = 0L;
var portion = new List<T>();
var portionStart = 0;
for (var i = 0; i < data.Count; i++)
{
var t = data[i];
var runBulk = i == data.Count - 1;
if (!(t is ISearchItemDocument wwd) || wwd.Document == null || string.IsNullOrEmpty(wwd.Document.Data))
2020-01-24 13:07:51 +00:00
{
2020-02-17 08:58:14 +00:00
portion.Add(t);
2020-01-24 13:07:51 +00:00
}
else
{
2020-02-17 08:58:14 +00:00
var dLength = wwd.Document.Data.Length;
2020-05-03 15:50:40 +00:00
if (dLength >= Settings.MemoryLimit)
2020-02-17 08:58:14 +00:00
{
try
{
Index(t, immediately);
}
catch (Exception e)
{
Log.Error(e);
}
2020-01-24 13:07:51 +00:00
wwd.Document.Data = null;
wwd.Document = null;
GC.Collect();
continue;
}
2020-05-03 15:50:40 +00:00
if (currentLength + dLength < Settings.MemoryLimit)
2020-01-24 13:07:51 +00:00
{
2020-02-17 08:58:14 +00:00
portion.Add(t);
currentLength += dLength;
2020-01-24 13:07:51 +00:00
}
2020-02-17 08:58:14 +00:00
else
{
2020-01-24 13:07:51 +00:00
runBulk = true;
i--;
}
}
if (runBulk)
{
2020-02-17 08:58:14 +00:00
var portion1 = portion;
2020-01-24 13:07:51 +00:00
Client.Instance.Bulk(r => r.IndexMany(portion1, GetMeta));
for (var j = portionStart; j < i; j++)
{
2020-04-28 15:11:10 +00:00
if (data[j] is ISearchItemDocument doc && doc.Document != null)
2020-01-24 13:07:51 +00:00
{
doc.Document.Data = null;
doc.Document = null;
}
2020-02-17 08:58:14 +00:00
}
2020-01-24 13:07:51 +00:00
portionStart = i;
portion = new List<T>();
currentLength = 0L;
GC.Collect();
}
}
}
else
{
Client.Instance.Bulk(r => r.IndexMany(data, GetMeta));
2020-02-17 08:58:14 +00:00
}
2020-01-24 13:07:51 +00:00
}
internal void Update(T data, bool immediately = true, params Expression<Func<T, object>>[] fields)
{
Client.Instance.Update(DocumentPath<T>.Id(data), r => GetMetaForUpdate(r, data, immediately, fields));
}
internal void Update(T data, UpdateAction action, Expression<Func<T, IList>> fields, bool immediately = true)
{
Client.Instance.Update(DocumentPath<T>.Id(data), r => GetMetaForUpdate(r, data, action, fields, immediately));
}
internal void Update(T data, Expression<Func<Selector<T>, Selector<T>>> expression, int tenantId, bool immediately = true, params Expression<Func<T, object>>[] fields)
{
//CreateIfNotExist(data);
2020-01-24 13:07:51 +00:00
Client.Instance.UpdateByQuery(GetDescriptorForUpdate(data, expression, tenantId, immediately, fields));
}
internal void Update(T data, Expression<Func<Selector<T>, Selector<T>>> expression, int tenantId, UpdateAction action, Expression<Func<T, IList>> fields, bool immediately = true)
{
Client.Instance.UpdateByQuery(GetDescriptorForUpdate(data, expression, tenantId, action, fields, immediately));
}
internal void Delete(T data, bool immediately = true)
{
Client.Instance.Delete<T>(data, r => GetMetaForDelete(r, immediately));
}
internal void Delete(Expression<Func<Selector<T>, Selector<T>>> expression, int tenantId, bool immediately = true)
{
Client.Instance.DeleteByQuery(GetDescriptorForDelete(expression, tenantId, immediately));
}
public void Flush()
{
2020-04-23 17:24:56 +00:00
Client.Instance.Indices.Flush(new FlushRequest(IndexName));
2020-01-24 13:07:51 +00:00
}
public void Refresh()
{
2020-04-23 17:24:56 +00:00
Client.Instance.Indices.Refresh(new RefreshRequest(IndexName));
2020-01-24 13:07:51 +00:00
}
internal bool CheckExist(T data)
{
try
2020-02-17 08:58:14 +00:00
{
2020-04-23 17:24:56 +00:00
var isExist = BaseIndexerHelper.IsExist.GetOrAdd(data.IndexName, (k) => Client.Instance.Indices.Exists(k).Exists);
2020-02-17 08:58:14 +00:00
if (isExist) return true;
lock (Locker)
{
if (isExist) return true;
2020-04-23 17:24:56 +00:00
isExist = Client.Instance.Indices.Exists(data.IndexName).Exists;
2020-02-17 08:58:14 +00:00
_ = BaseIndexerHelper.IsExist.TryUpdate(data.IndexName, IsExist, false);
if (isExist) return true;
2020-01-24 13:07:51 +00:00
}
}
catch (Exception e)
{
Log.Error("CheckExist " + data.IndexName, e);
}
return false;
}
2020-04-23 09:38:50 +00:00
public async Task ReIndex()
2020-01-24 13:07:51 +00:00
{
Clear();
//((IIndexer) this).IndexAll();
}
private void Clear()
2020-02-17 08:58:14 +00:00
{
var index = WebstudioDbContext.WebstudioIndex.Where(r => r.IndexName == Wrapper.IndexName).FirstOrDefault();
if (index != null)
{
WebstudioDbContext.WebstudioIndex.Remove(index);
}
WebstudioDbContext.SaveChanges();
2020-01-24 13:07:51 +00:00
Log.DebugFormat("Delete {0}", Wrapper.IndexName);
2020-04-23 17:24:56 +00:00
Client.Instance.Indices.Delete(Wrapper.IndexName);
2020-01-24 13:07:51 +00:00
BaseIndexerHelper.Clear(Wrapper);
}
internal IReadOnlyCollection<T> Select(Expression<Func<Selector<T>, Selector<T>>> expression, bool onlyId = false)
{
var func = expression.Compile();
2020-02-11 15:10:30 +00:00
var selector = ServiceProvider.GetService<Selector<T>>();
2020-01-24 13:07:51 +00:00
var descriptor = func(selector).Where(r => r.TenantId, TenantManager.GetCurrentTenant().TenantId);
return Client.Instance.Search(descriptor.GetDescriptor(this, onlyId)).Documents;
}
internal IReadOnlyCollection<T> Select(Expression<Func<Selector<T>, Selector<T>>> expression, bool onlyId, out long total)
{
var func = expression.Compile();
2020-02-11 15:10:30 +00:00
var selector = ServiceProvider.GetService<Selector<T>>();
2020-01-24 13:07:51 +00:00
var descriptor = func(selector).Where(r => r.TenantId, TenantManager.GetCurrentTenant().TenantId);
var result = Client.Instance.Search(descriptor.GetDescriptor(this, onlyId));
total = result.Total;
return result.Documents;
}
public void CreateIfNotExist(T data)
{
try
{
if (CheckExist(data)) return;
lock (Locker)
{
IPromise<IAnalyzers> analyzers(AnalyzersDescriptor b)
{
foreach (var c in Enum.GetNames(typeof(Analyzer)))
{
var c1 = c;
b.Custom(c1 + "custom", ca => ca.Tokenizer(c1).Filters(Filter.lowercase.ToString()).CharFilters(CharFilter.io.ToString()));
}
foreach (var c in Enum.GetNames(typeof(CharFilter)))
{
if (c == CharFilter.io.ToString()) continue;
var charFilters = new List<string>() { CharFilter.io.ToString(), c };
var c1 = c;
b.Custom(c1 + "custom", ca => ca.Tokenizer(Analyzer.whitespace.ToString()).Filters(Filter.lowercase.ToString()).CharFilters(charFilters));
}
if (data is ISearchItemDocument)
{
b.Custom("document", ca => ca.Tokenizer(Analyzer.whitespace.ToString()).Filters(Filter.lowercase.ToString()).CharFilters(CharFilter.io.ToString()));
}
return b;
}
var createIndexResponse = Client.Instance.Indices.Create(data.IndexName,
c =>
c.Map<T>(m => m.AutoMap())
.Settings(r => r.Analysis(a =>
a.Analyzers(analyzers)
.CharFilters(d => d.HtmlStrip(CharFilter.html.ToString())
.Mapping(CharFilter.io.ToString(), m => m.Mappings("ё => е", "Ё => Е"))))));
IsExist = true;
}
}
catch (Exception e)
{
Log.Error("CreateIfNotExist", e);
}
}
2020-01-24 13:07:51 +00:00
private IIndexRequest<T> GetMeta(IndexDescriptor<T> request, T data, bool immediately = true)
{
2020-02-19 08:37:52 +00:00
var result = request.Index(data.IndexName).Id(data.Id);
2020-01-24 13:07:51 +00:00
if (immediately)
{
result.Refresh(Elasticsearch.Net.Refresh.True);
}
if (data is ISearchItemDocument)
2020-01-24 13:07:51 +00:00
{
result.Pipeline("attachments");
}
return result;
}
private IBulkIndexOperation<T> GetMeta(BulkIndexDescriptor<T> desc, T data)
{
var result = desc.Index(IndexName).Id(data.Id);
if (data is ISearchItemDocument)
2020-01-24 13:07:51 +00:00
{
result.Pipeline("attachments");
}
return result;
}
private IUpdateRequest<T, T> GetMetaForUpdate(UpdateDescriptor<T, T> request, T data, bool immediately = true, params Expression<Func<T, object>>[] fields)
{
var result = request.Index(IndexName);
if (fields.Any())
{
result.Script(GetScriptUpdateByQuery(data, fields));
}
else
{
result.Doc(data);
}
if (immediately)
{
result.Refresh(Elasticsearch.Net.Refresh.True);
}
return result;
}
private Func<ScriptDescriptor, IScript> GetScriptUpdateByQuery(T data, params Expression<Func<T, object>>[] fields)
{
var source = new StringBuilder();
var parameters = new Dictionary<string, object>();
for (var i = 0; i < fields.Length; i++)
{
var func = fields[i].Compile();
var newValue = func(data);
string name;
var expression = fields[i].Body;
var isList = expression.Type.IsGenericType && expression.Type.GetGenericTypeDefinition() == typeof(List<>);
var sourceExprText = "";
while (!string.IsNullOrEmpty(name = TryGetName(expression, out var member)))
{
sourceExprText = "." + name + sourceExprText;
expression = member.Expression;
}
if (isList)
{
UpdateByAction(UpdateAction.Add, (IList)newValue, sourceExprText, parameters, source);
}
else
{
if (newValue == default(T))
{
source.AppendFormat("ctx._source.remove('{0}');", sourceExprText.Substring(1));
}
else
{
var pkey = "p" + sourceExprText.Replace(".", "");
source.AppendFormat("ctx._source{0} = params.{1};", sourceExprText, pkey);
parameters.Add(pkey, newValue);
}
}
}
var sourceData = source.ToString();
return r => r.Source(sourceData).Params(parameters);
}
private IUpdateRequest<T, T> GetMetaForUpdate(UpdateDescriptor<T, T> request, T data, UpdateAction action, Expression<Func<T, IList>> fields, bool immediately = true)
{
var result = request.Index(IndexName).Script(GetScriptForUpdate(data, action, fields));
if (immediately)
{
result.Refresh(Elasticsearch.Net.Refresh.True);
}
return result;
}
private Func<ScriptDescriptor, IScript> GetScriptForUpdate(T data, UpdateAction action, Expression<Func<T, IList>> fields)
{
var source = new StringBuilder();
var func = fields.Compile();
var newValue = func(data);
string name;
var expression = fields.Body;
MemberExpression member;
var sourceExprText = "";
while (!string.IsNullOrEmpty(name = TryGetName(expression, out member)))
{
sourceExprText = "." + name + sourceExprText;
expression = member.Expression;
}
var parameters = new Dictionary<string, object>();
UpdateByAction(action, newValue, sourceExprText, parameters, source);
return r => r.Source(source.ToString()).Params(parameters);
}
private void UpdateByAction(UpdateAction action, IList newValue, string key, Dictionary<string, object> parameters, StringBuilder source)
{
var paramKey = "p" + key.Replace(".", "");
switch (action)
{
case UpdateAction.Add:
for (var i = 0; i < newValue.Count; i++)
{
parameters.Add(paramKey + i, newValue[i]);
source.AppendFormat("if (!ctx._source{0}.contains(params.{1})){{ctx._source{0}.add(params.{1})}}", key, paramKey + i);
}
break;
case UpdateAction.Replace:
parameters.Add(paramKey, newValue);
source.AppendFormat("ctx._source{0} = params.{1};", key, paramKey);
break;
case UpdateAction.Remove:
for (var i = 0; i < newValue.Count; i++)
{
parameters.Add(paramKey + i, newValue[i]);
source.AppendFormat("ctx._source{0}.removeIf(item -> item.id == params.{1}.id)", key, paramKey + i);
}
break;
default:
throw new ArgumentOutOfRangeException("action", action, null);
}
}
private string TryGetName(Expression expr, out MemberExpression member)
{
member = expr as MemberExpression;
if (member == null)
{
var unary = expr as UnaryExpression;
if (unary != null)
{
member = unary.Operand as MemberExpression;
}
}
return member == null ? "" : member.Member.Name.ToLowerCamelCase();
}
private IDeleteRequest GetMetaForDelete(DeleteDescriptor<T> request, bool immediately = true)
{
var result = request.Index(IndexName);
if (immediately)
{
result.Refresh(Elasticsearch.Net.Refresh.True);
}
return result;
}
private Func<DeleteByQueryDescriptor<T>, IDeleteByQueryRequest> GetDescriptorForDelete(Expression<Func<Selector<T>, Selector<T>>> expression, int tenantId, bool immediately = true)
{
var func = expression.Compile();
2020-02-11 15:10:30 +00:00
var selector = ServiceProvider.GetService<Selector<T>>();
2020-01-24 13:07:51 +00:00
var descriptor = func(selector).Where(r => r.TenantId, tenantId);
return descriptor.GetDescriptorForDelete(this, immediately);
}
private Func<UpdateByQueryDescriptor<T>, IUpdateByQueryRequest> GetDescriptorForUpdate(T data, Expression<Func<Selector<T>, Selector<T>>> expression, int tenantId, bool immediately = true, params Expression<Func<T, object>>[] fields)
{
var func = expression.Compile();
2020-02-11 15:10:30 +00:00
var selector = ServiceProvider.GetService<Selector<T>>();
2020-01-24 13:07:51 +00:00
var descriptor = func(selector).Where(r => r.TenantId, tenantId);
return descriptor.GetDescriptorForUpdate(this, GetScriptUpdateByQuery(data, fields), immediately);
}
private Func<UpdateByQueryDescriptor<T>, IUpdateByQueryRequest> GetDescriptorForUpdate(T data, Expression<Func<Selector<T>, Selector<T>>> expression, int tenantId, UpdateAction action, Expression<Func<T, IList>> fields, bool immediately = true)
{
var func = expression.Compile();
2020-02-11 15:10:30 +00:00
var selector = ServiceProvider.GetService<Selector<T>>();
2020-01-24 13:07:51 +00:00
var descriptor = func(selector).Where(r => r.TenantId, tenantId);
return descriptor.GetDescriptorForUpdate(this, GetScriptForUpdate(data, action, fields), immediately);
2020-04-23 09:38:50 +00:00
}
2020-04-27 16:59:52 +00:00
public IEnumerable<List<T>> IndexAll(Func<DateTime, (int, int, int)> getCount, Func<long, long, DateTime, List<T>> getData)
{
2020-04-27 16:59:52 +00:00
var lastIndexed = WebstudioDbContext.WebstudioIndex
.Where(r => r.IndexName == Wrapper.IndexName)
.Select(r => r.LastModified)
.FirstOrDefault();
var (count, max, min) = getCount(lastIndexed);
Log.Debug($"Index: {IndexName}, Count {count}, Max: {max}, Min: {min}");
if (count != 0)
{
var step = (max - min + 1) / count;
if (step == 0)
{
step = 1;
}
if (step < QueryLimit)
{
step = QueryLimit;
}
for (var i = min; i <= max; i += step)
{
yield return getData(i, step, lastIndexed);
//FactoryIndexer<T>.Index(data.Cast<T>().ToList());
}
}
WebstudioDbContext.AddOrUpdate(r => r.WebstudioIndex, new DbWebstudioIndex()
{
2020-04-29 14:11:31 +00:00
IndexName = Wrapper.IndexName,
LastModified = DateTime.UtcNow
2020-04-27 16:59:52 +00:00
});
WebstudioDbContext.SaveChanges();
2020-04-27 16:59:52 +00:00
Log.Debug($"index completed {Wrapper.IndexName}");
}
2020-01-24 13:07:51 +00:00
}
static class CamelCaseExtension
{
internal static string ToLowerCamelCase(this string str)
{
return str.ToLowerInvariant()[0] + str.Substring(1);
}
}
public enum UpdateAction
{
Add,
Replace,
Remove
2020-02-17 08:58:14 +00:00
}
public static class BaseIndexerExtention
{
public static DIHelper AddBaseIndexerHelperService(this DIHelper services)
{
services.TryAddSingleton<BaseIndexerHelper>();
return services.AddKafkaService();
}
public static DIHelper AddBaseIndexerService<T>(this DIHelper services) where T : class, ISearchItem
2020-02-17 08:58:14 +00:00
{
services.TryAddScoped<BaseIndexer<T>>();
return services
.AddFactoryIndexerService()
.AddClientService()
.AddWebstudioDbContextService()
.AddTenantManagerService()
.AddSearchSettingsHelperService()
.AddBaseIndexerHelperService()
;
}
}
2020-01-24 13:07:51 +00:00
}