
648 lines
24 KiB
Raw Normal View History

2020-01-24 13:07:51 +00:00
* (c) Copyright Ascensio System Limited 2010-2018
* This program is freeware. You can redistribute it and/or modify it under the terms of the GNU
* General Public License (GPL) version 3 as published by the Free Software Foundation (
* In accordance with Section 7(a) of the GNU GPL its Section 15 shall be amended to the effect that
* Ascensio System SIA expressly excludes the warranty of non-infringement of any third-party rights.
* You can contact Ascensio System SIA by email at
* The interactive user interfaces in modified source and object code versions of ONLYOFFICE must display
* Appropriate Legal Notices, as required under Section 5 of the GNU GPL version 3.
* Pursuant to Section 7 § 3(b) of the GNU GPL you must retain the original ONLYOFFICE logo which contains
* relevant author attributions when distributing the software. If the display of the logo in its graphic
* form is not reasonably feasible for technical reasons, you must include the words "Powered by ONLYOFFICE"
* in every copy of the program you distribute.
* Pursuant to Section 7 § 3(e) we decline to grant you any rights under trademark law for use of our trademarks.
using System;
using System.Collections;
2020-02-17 08:58:14 +00:00
using System.Collections.Concurrent;
2020-01-24 13:07:51 +00:00
using System.Collections.Generic;
using System.Linq;
using System.Linq.Expressions;
using System.Text;
2020-04-23 09:38:50 +00:00
using System.Threading.Tasks;
2020-01-24 13:07:51 +00:00
2020-02-17 08:58:14 +00:00
using ASC.Common;
2020-01-24 13:07:51 +00:00
using ASC.Common.Caching;
using ASC.Common.Logging;
2020-02-17 08:58:14 +00:00
using ASC.Core;
using ASC.Core.Common.EF;
using ASC.Core.Common.EF.Context;
2020-04-27 16:59:52 +00:00
using ASC.Core.Common.EF.Model;
2020-01-24 13:07:51 +00:00
using ASC.ElasticSearch.Core;
using ASC.ElasticSearch.Service;
2020-02-17 08:58:14 +00:00
using Autofac;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
2020-01-24 13:07:51 +00:00
using Nest;
namespace ASC.ElasticSearch
2020-02-17 08:58:14 +00:00
public class BaseIndexerHelper
2020-01-24 13:07:51 +00:00
public ConcurrentDictionary<string, bool> IsExist { get; set; }
2020-02-17 08:58:14 +00:00
private readonly ICacheNotify<SearchItem> Notify;
public BaseIndexerHelper(ICacheNotify<SearchItem> cacheNotify)
IsExist = new ConcurrentDictionary<string, bool>();
2020-01-24 13:07:51 +00:00
Notify = cacheNotify;
Notify.Subscribe((a) =>
2020-02-17 08:58:14 +00:00
2020-01-24 13:07:51 +00:00
IsExist.AddOrUpdate(a.Id, false, (q, w) => false);
2020-02-17 08:58:14 +00:00
}, CacheNotifyAction.Any);
public void Clear<T>(T t) where T : class, ISearchItem
2020-02-17 08:58:14 +00:00
Notify.Publish(new SearchItem() { Id = t.IndexName }, CacheNotifyAction.Any);
2020-04-27 16:59:52 +00:00
public class BaseIndexer<T> where T : class, ISearchItem
2020-01-24 13:07:51 +00:00
private static readonly object Locker = new object();
2020-02-18 08:39:28 +00:00
protected internal T Wrapper { get { return ServiceProvider.GetService<T>(); } }
2020-01-24 13:07:51 +00:00
2020-02-17 08:58:14 +00:00
public string IndexName { get { return Wrapper.IndexName; } }
2020-04-27 16:59:52 +00:00
private const int QueryLimit = 1000;
2020-02-17 08:58:14 +00:00
public bool IsExist { get; set; }
public Client Client { get; }
public ILog Log { get; }
public TenantManager TenantManager { get; }
public SearchSettingsHelper SearchSettingsHelper { get; }
2020-05-03 15:50:40 +00:00
public BaseIndexerHelper BaseIndexerHelper { get; }
public Settings Settings { get; }
2020-02-17 08:58:14 +00:00
public IServiceProvider ServiceProvider { get; }
public WebstudioDbContext WebstudioDbContext { get; }
2020-02-17 08:58:14 +00:00
public BaseIndexer(
Client client,
IOptionsMonitor<ILog> log,
DbContextManager<WebstudioDbContext> dbContextManager,
TenantManager tenantManager,
SearchSettingsHelper searchSettingsHelper,
2020-05-03 15:50:40 +00:00
BaseIndexerHelper baseIndexerHelper,
Settings settings,
2020-02-11 15:10:30 +00:00
IServiceProvider serviceProvider)
2020-01-24 13:07:51 +00:00
2020-02-17 08:58:14 +00:00
Client = client;
Log = log.CurrentValue;
TenantManager = tenantManager;
SearchSettingsHelper = searchSettingsHelper;
2020-05-03 15:50:40 +00:00
BaseIndexerHelper = baseIndexerHelper;
Settings = settings;
2020-02-17 08:58:14 +00:00
ServiceProvider = serviceProvider;
WebstudioDbContext = dbContextManager.Value;
2020-01-24 13:07:51 +00:00
internal void Index(T data, bool immediately = true)
2020-04-27 16:59:52 +00:00
Client.Instance.Index(data, idx => GetMeta(idx, data, immediately));
2020-01-24 13:07:51 +00:00
internal void Index(List<T> data, bool immediately = true)
2020-02-17 08:58:14 +00:00
2020-04-28 15:11:10 +00:00
if (!data.Any()) return;
2020-02-17 08:58:14 +00:00
2020-04-28 15:11:10 +00:00
if (data[0] is ISearchItemDocument)
2020-01-24 13:07:51 +00:00
var currentLength = 0L;
var portion = new List<T>();
var portionStart = 0;
for (var i = 0; i < data.Count; i++)
var t = data[i];
var runBulk = i == data.Count - 1;
if (!(t is ISearchItemDocument wwd) || wwd.Document == null || string.IsNullOrEmpty(wwd.Document.Data))
2020-01-24 13:07:51 +00:00
2020-02-17 08:58:14 +00:00
2020-01-24 13:07:51 +00:00
2020-02-17 08:58:14 +00:00
var dLength = wwd.Document.Data.Length;
2020-05-03 15:50:40 +00:00
if (dLength >= Settings.MemoryLimit)
2020-02-17 08:58:14 +00:00
Index(t, immediately);
catch (Exception e)
2020-01-24 13:07:51 +00:00
wwd.Document.Data = null;
wwd.Document = null;
2020-05-03 15:50:40 +00:00
if (currentLength + dLength < Settings.MemoryLimit)
2020-01-24 13:07:51 +00:00
2020-02-17 08:58:14 +00:00
currentLength += dLength;
2020-01-24 13:07:51 +00:00
2020-02-17 08:58:14 +00:00
2020-01-24 13:07:51 +00:00
runBulk = true;
if (runBulk)
2020-02-17 08:58:14 +00:00
var portion1 = portion;
2020-01-24 13:07:51 +00:00
Client.Instance.Bulk(r => r.IndexMany(portion1, GetMeta));
for (var j = portionStart; j < i; j++)
2020-04-28 15:11:10 +00:00
if (data[j] is ISearchItemDocument doc && doc.Document != null)
2020-01-24 13:07:51 +00:00
doc.Document.Data = null;
doc.Document = null;
2020-02-17 08:58:14 +00:00
2020-01-24 13:07:51 +00:00
portionStart = i;
portion = new List<T>();
currentLength = 0L;
Client.Instance.Bulk(r => r.IndexMany(data, GetMeta));
2020-02-17 08:58:14 +00:00
2020-01-24 13:07:51 +00:00
internal void Update(T data, bool immediately = true, params Expression<Func<T, object>>[] fields)
Client.Instance.Update(DocumentPath<T>.Id(data), r => GetMetaForUpdate(r, data, immediately, fields));
internal void Update(T data, UpdateAction action, Expression<Func<T, IList>> fields, bool immediately = true)
Client.Instance.Update(DocumentPath<T>.Id(data), r => GetMetaForUpdate(r, data, action, fields, immediately));
internal void Update(T data, Expression<Func<Selector<T>, Selector<T>>> expression, int tenantId, bool immediately = true, params Expression<Func<T, object>>[] fields)
2020-01-24 13:07:51 +00:00
Client.Instance.UpdateByQuery(GetDescriptorForUpdate(data, expression, tenantId, immediately, fields));
internal void Update(T data, Expression<Func<Selector<T>, Selector<T>>> expression, int tenantId, UpdateAction action, Expression<Func<T, IList>> fields, bool immediately = true)
Client.Instance.UpdateByQuery(GetDescriptorForUpdate(data, expression, tenantId, action, fields, immediately));
internal void Delete(T data, bool immediately = true)
Client.Instance.Delete<T>(data, r => GetMetaForDelete(r, immediately));
internal void Delete(Expression<Func<Selector<T>, Selector<T>>> expression, int tenantId, bool immediately = true)
Client.Instance.DeleteByQuery(GetDescriptorForDelete(expression, tenantId, immediately));
public void Flush()
2020-04-23 17:24:56 +00:00
Client.Instance.Indices.Flush(new FlushRequest(IndexName));
2020-01-24 13:07:51 +00:00
public void Refresh()
2020-04-23 17:24:56 +00:00
Client.Instance.Indices.Refresh(new RefreshRequest(IndexName));
2020-01-24 13:07:51 +00:00
internal bool CheckExist(T data)
2020-02-17 08:58:14 +00:00
2020-04-23 17:24:56 +00:00
var isExist = BaseIndexerHelper.IsExist.GetOrAdd(data.IndexName, (k) => Client.Instance.Indices.Exists(k).Exists);
2020-02-17 08:58:14 +00:00
if (isExist) return true;
lock (Locker)
if (isExist) return true;
2020-04-23 17:24:56 +00:00
isExist = Client.Instance.Indices.Exists(data.IndexName).Exists;
2020-02-17 08:58:14 +00:00
_ = BaseIndexerHelper.IsExist.TryUpdate(data.IndexName, IsExist, false);
if (isExist) return true;
2020-01-24 13:07:51 +00:00
catch (Exception e)
Log.Error("CheckExist " + data.IndexName, e);
return false;
2020-04-23 09:38:50 +00:00
public async Task ReIndex()
2020-01-24 13:07:51 +00:00
//((IIndexer) this).IndexAll();
private void Clear()
2020-02-17 08:58:14 +00:00
var index = WebstudioDbContext.WebstudioIndex.Where(r => r.IndexName == Wrapper.IndexName).FirstOrDefault();
if (index != null)
2020-01-24 13:07:51 +00:00
Log.DebugFormat("Delete {0}", Wrapper.IndexName);
2020-04-23 17:24:56 +00:00
2020-01-24 13:07:51 +00:00
internal IReadOnlyCollection<T> Select(Expression<Func<Selector<T>, Selector<T>>> expression, bool onlyId = false)
var func = expression.Compile();
2020-02-11 15:10:30 +00:00
var selector = ServiceProvider.GetService<Selector<T>>();
2020-01-24 13:07:51 +00:00
var descriptor = func(selector).Where(r => r.TenantId, TenantManager.GetCurrentTenant().TenantId);
return Client.Instance.Search(descriptor.GetDescriptor(this, onlyId)).Documents;
internal IReadOnlyCollection<T> Select(Expression<Func<Selector<T>, Selector<T>>> expression, bool onlyId, out long total)
var func = expression.Compile();
2020-02-11 15:10:30 +00:00
var selector = ServiceProvider.GetService<Selector<T>>();
2020-01-24 13:07:51 +00:00
var descriptor = func(selector).Where(r => r.TenantId, TenantManager.GetCurrentTenant().TenantId);
var result = Client.Instance.Search(descriptor.GetDescriptor(this, onlyId));
total = result.Total;
return result.Documents;
public void CreateIfNotExist(T data)
if (CheckExist(data)) return;
lock (Locker)
IPromise<IAnalyzers> analyzers(AnalyzersDescriptor b)
foreach (var c in Enum.GetNames(typeof(Analyzer)))
var c1 = c;
b.Custom(c1 + "custom", ca => ca.Tokenizer(c1).Filters(Filter.lowercase.ToString()).CharFilters(;
foreach (var c in Enum.GetNames(typeof(CharFilter)))
if (c == continue;
var charFilters = new List<string>() {, c };
var c1 = c;
b.Custom(c1 + "custom", ca => ca.Tokenizer(Analyzer.whitespace.ToString()).Filters(Filter.lowercase.ToString()).CharFilters(charFilters));
if (data is ISearchItemDocument)
b.Custom("document", ca => ca.Tokenizer(Analyzer.whitespace.ToString()).Filters(Filter.lowercase.ToString()).CharFilters(;
return b;
var createIndexResponse = Client.Instance.Indices.Create(data.IndexName,
c =>
c.Map<T>(m => m.AutoMap())
.Settings(r => r.Analysis(a =>
.CharFilters(d => d.HtmlStrip(CharFilter.html.ToString())
.Mapping(, m => m.Mappings("ё => е", "Ё => Е"))))));
IsExist = true;
catch (Exception e)
Log.Error("CreateIfNotExist", e);
2020-01-24 13:07:51 +00:00
private IIndexRequest<T> GetMeta(IndexDescriptor<T> request, T data, bool immediately = true)
2020-02-19 08:37:52 +00:00
var result = request.Index(data.IndexName).Id(data.Id);
2020-01-24 13:07:51 +00:00
if (immediately)
if (data is ISearchItemDocument)
2020-01-24 13:07:51 +00:00
return result;
private IBulkIndexOperation<T> GetMeta(BulkIndexDescriptor<T> desc, T data)
var result = desc.Index(IndexName).Id(data.Id);
if (data is ISearchItemDocument)
2020-01-24 13:07:51 +00:00
return result;
private IUpdateRequest<T, T> GetMetaForUpdate(UpdateDescriptor<T, T> request, T data, bool immediately = true, params Expression<Func<T, object>>[] fields)
var result = request.Index(IndexName);
if (fields.Any())
result.Script(GetScriptUpdateByQuery(data, fields));
if (immediately)
return result;
private Func<ScriptDescriptor, IScript> GetScriptUpdateByQuery(T data, params Expression<Func<T, object>>[] fields)
var source = new StringBuilder();
var parameters = new Dictionary<string, object>();
for (var i = 0; i < fields.Length; i++)
var func = fields[i].Compile();
var newValue = func(data);
string name;
var expression = fields[i].Body;
var isList = expression.Type.IsGenericType && expression.Type.GetGenericTypeDefinition() == typeof(List<>);
var sourceExprText = "";
while (!string.IsNullOrEmpty(name = TryGetName(expression, out var member)))
sourceExprText = "." + name + sourceExprText;
expression = member.Expression;
if (isList)
UpdateByAction(UpdateAction.Add, (IList)newValue, sourceExprText, parameters, source);
if (newValue == default(T))
source.AppendFormat("ctx._source.remove('{0}');", sourceExprText.Substring(1));
var pkey = "p" + sourceExprText.Replace(".", "");
source.AppendFormat("ctx._source{0} = params.{1};", sourceExprText, pkey);
parameters.Add(pkey, newValue);
var sourceData = source.ToString();
return r => r.Source(sourceData).Params(parameters);
private IUpdateRequest<T, T> GetMetaForUpdate(UpdateDescriptor<T, T> request, T data, UpdateAction action, Expression<Func<T, IList>> fields, bool immediately = true)
var result = request.Index(IndexName).Script(GetScriptForUpdate(data, action, fields));
if (immediately)
return result;
private Func<ScriptDescriptor, IScript> GetScriptForUpdate(T data, UpdateAction action, Expression<Func<T, IList>> fields)
var source = new StringBuilder();
var func = fields.Compile();
var newValue = func(data);
string name;
var expression = fields.Body;
MemberExpression member;
var sourceExprText = "";
while (!string.IsNullOrEmpty(name = TryGetName(expression, out member)))
sourceExprText = "." + name + sourceExprText;
expression = member.Expression;
var parameters = new Dictionary<string, object>();
UpdateByAction(action, newValue, sourceExprText, parameters, source);
return r => r.Source(source.ToString()).Params(parameters);
private void UpdateByAction(UpdateAction action, IList newValue, string key, Dictionary<string, object> parameters, StringBuilder source)
var paramKey = "p" + key.Replace(".", "");
switch (action)
case UpdateAction.Add:
for (var i = 0; i < newValue.Count; i++)
parameters.Add(paramKey + i, newValue[i]);
source.AppendFormat("if (!ctx._source{0}.contains(params.{1})){{ctx._source{0}.add(params.{1})}}", key, paramKey + i);
case UpdateAction.Replace:
parameters.Add(paramKey, newValue);
source.AppendFormat("ctx._source{0} = params.{1};", key, paramKey);
case UpdateAction.Remove:
for (var i = 0; i < newValue.Count; i++)
parameters.Add(paramKey + i, newValue[i]);
source.AppendFormat("ctx._source{0}.removeIf(item -> == params.{1}.id)", key, paramKey + i);
throw new ArgumentOutOfRangeException("action", action, null);
private string TryGetName(Expression expr, out MemberExpression member)
member = expr as MemberExpression;
if (member == null)
var unary = expr as UnaryExpression;
if (unary != null)
member = unary.Operand as MemberExpression;
return member == null ? "" : member.Member.Name.ToLowerCamelCase();
private IDeleteRequest GetMetaForDelete(DeleteDescriptor<T> request, bool immediately = true)
var result = request.Index(IndexName);
if (immediately)
return result;
private Func<DeleteByQueryDescriptor<T>, IDeleteByQueryRequest> GetDescriptorForDelete(Expression<Func<Selector<T>, Selector<T>>> expression, int tenantId, bool immediately = true)
var func = expression.Compile();
2020-02-11 15:10:30 +00:00
var selector = ServiceProvider.GetService<Selector<T>>();
2020-01-24 13:07:51 +00:00
var descriptor = func(selector).Where(r => r.TenantId, tenantId);
return descriptor.GetDescriptorForDelete(this, immediately);
private Func<UpdateByQueryDescriptor<T>, IUpdateByQueryRequest> GetDescriptorForUpdate(T data, Expression<Func<Selector<T>, Selector<T>>> expression, int tenantId, bool immediately = true, params Expression<Func<T, object>>[] fields)
var func = expression.Compile();
2020-02-11 15:10:30 +00:00
var selector = ServiceProvider.GetService<Selector<T>>();
2020-01-24 13:07:51 +00:00
var descriptor = func(selector).Where(r => r.TenantId, tenantId);
return descriptor.GetDescriptorForUpdate(this, GetScriptUpdateByQuery(data, fields), immediately);
private Func<UpdateByQueryDescriptor<T>, IUpdateByQueryRequest> GetDescriptorForUpdate(T data, Expression<Func<Selector<T>, Selector<T>>> expression, int tenantId, UpdateAction action, Expression<Func<T, IList>> fields, bool immediately = true)
var func = expression.Compile();
2020-02-11 15:10:30 +00:00
var selector = ServiceProvider.GetService<Selector<T>>();
2020-01-24 13:07:51 +00:00
var descriptor = func(selector).Where(r => r.TenantId, tenantId);
return descriptor.GetDescriptorForUpdate(this, GetScriptForUpdate(data, action, fields), immediately);
2020-04-23 09:38:50 +00:00
2020-04-27 16:59:52 +00:00
public IEnumerable<List<T>> IndexAll(Func<DateTime, (int, int, int)> getCount, Func<long, long, DateTime, List<T>> getData)
2020-04-27 16:59:52 +00:00
var lastIndexed = WebstudioDbContext.WebstudioIndex
.Where(r => r.IndexName == Wrapper.IndexName)
.Select(r => r.LastModified)
var (count, max, min) = getCount(lastIndexed);
Log.Debug($"Index: {IndexName}, Count {count}, Max: {max}, Min: {min}");
if (count != 0)
var step = (max - min + 1) / count;
if (step == 0)
step = 1;
if (step < QueryLimit)
step = QueryLimit;
for (var i = min; i <= max; i += step)
yield return getData(i, step, lastIndexed);
WebstudioDbContext.AddOrUpdate(r => r.WebstudioIndex, new DbWebstudioIndex()
2020-04-29 14:11:31 +00:00
IndexName = Wrapper.IndexName,
LastModified = DateTime.UtcNow
2020-04-27 16:59:52 +00:00
2020-04-27 16:59:52 +00:00
Log.Debug($"index completed {Wrapper.IndexName}");
2020-01-24 13:07:51 +00:00
static class CamelCaseExtension
internal static string ToLowerCamelCase(this string str)
return str.ToLowerInvariant()[0] + str.Substring(1);
public enum UpdateAction
2020-02-17 08:58:14 +00:00
public static class BaseIndexerExtention
public static DIHelper AddBaseIndexerHelperService(this DIHelper services)
return services.AddKafkaService();
public static DIHelper AddBaseIndexerService<T>(this DIHelper services) where T : class, ISearchItem
2020-02-17 08:58:14 +00:00
return services
2020-01-24 13:07:51 +00:00