View Javadoc
1   /*
2    * Copyright 2012-2020 CodeLibs Project and the Others.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
13   * either express or implied. See the License for the specific language
14   * governing permissions and limitations under the License.
15   */
16  package org.codelibs.fess.helper;
17  
18  import java.util.ArrayList;
19  import java.util.List;
20  import java.util.Map;
21  
22  import org.apache.logging.log4j.LogManager;
23  import org.apache.logging.log4j.Logger;
24  import org.codelibs.fess.es.client.FessEsClient;
25  import org.codelibs.fess.mylasta.direction.FessConfig;
26  import org.codelibs.fess.thumbnail.ThumbnailManager;
27  import org.codelibs.fess.util.ComponentUtil;
28  import org.codelibs.fess.util.DocList;
29  import org.codelibs.fess.util.MemoryUtil;
30  import org.elasticsearch.action.search.SearchResponse;
31  import org.elasticsearch.index.query.QueryBuilder;
32  import org.elasticsearch.index.query.QueryBuilders;
33  
34  public class IndexingHelper {
35      private static final Logger logger = LogManager.getLogger(IndexingHelper.class);
36  
37      protected int maxRetryCount = 5;
38  
39      protected int defaultRowSize = 100;
40  
41      protected long requestInterval = 500;
42  
43      public void sendDocuments(final FessEsClient fessEsClient, final DocList docList) {
44          if (docList.isEmpty()) {
45              return;
46          }
47          final FessConfig fessConfig = ComponentUtil.getFessConfig();
48          if (fessConfig.isResultCollapsed()) {
49              docList.forEach(doc -> {
50                  doc.put("content_minhash", doc.get(fessConfig.getIndexFieldContent()));
51              });
52          }
53          final long execTime = System.currentTimeMillis();
54          if (logger.isDebugEnabled()) {
55              logger.debug("Sending {} documents to a server.", docList.size());
56          }
57          try {
58              if (fessConfig.isThumbnailCrawlerEnabled()) {
59                  final ThumbnailManager thumbnailManager = ComponentUtil.getThumbnailManager();
60                  docList.stream().forEach(
61                          doc -> {
62                              if (!thumbnailManager.offer(doc)) {
63                                  if (logger.isDebugEnabled()) {
64                                      logger.debug("Removing {} from {}", doc.get(fessConfig.getIndexFieldThumbnail()),
65                                              doc.get(fessConfig.getIndexFieldUrl()));
66                                  }
67                                  doc.remove(fessConfig.getIndexFieldThumbnail());
68                              }
69                          });
70              }
71              final CrawlingConfigHelper crawlingConfigHelper = ComponentUtil.getCrawlingConfigHelper();
72              synchronized (fessEsClient) {
73                  deleteOldDocuments(fessEsClient, docList);
74                  fessEsClient.addAll(fessConfig.getIndexDocumentUpdateIndex(), docList, (doc, builder) -> {
75                      final String configId = (String) doc.get(fessConfig.getIndexFieldConfigId());
76                      crawlingConfigHelper.getPipeline(configId).ifPresent(s -> builder.setPipeline(s));
77                  });
78              }
79              if (logger.isInfoEnabled()) {
80                  if (docList.getContentSize() > 0) {
81                      logger.info("Sent {} docs (Doc:{process {}ms, send {}ms, size {}}, {})", docList.size(), docList.getProcessingTime(),
82                              (System.currentTimeMillis() - execTime), MemoryUtil.byteCountToDisplaySize(docList.getContentSize()),
83                              MemoryUtil.getMemoryUsageLog());
84                  } else {
85                      logger.info("Sent {}  docs (Doc:{send {}ms}, {})", docList.size(), (System.currentTimeMillis() - execTime),
86                              MemoryUtil.getMemoryUsageLog());
87                  }
88              }
89          } finally {
90              docList.clear();
91          }
92      }
93  
94      private void deleteOldDocuments(final FessEsClient fessEsClient, final DocList docList) {
95          final FessConfig fessConfig = ComponentUtil.getFessConfig();
96  
97          final List<String> docIdList = new ArrayList<>();
98          for (final Map<String, Object> inputDoc : docList) {
99              final Object idValue = inputDoc.get(fessConfig.getIndexFieldId());
100             if (idValue == null) {
101                 continue;
102             }
103 
104             final Object configIdValue = inputDoc.get(fessConfig.getIndexFieldConfigId());
105             if (configIdValue == null) {
106                 continue;
107             }
108 
109             final QueryBuilder queryBuilder =
110                     QueryBuilders.boolQuery()
111                             .must(QueryBuilders.termQuery(fessConfig.getIndexFieldUrl(), inputDoc.get(fessConfig.getIndexFieldUrl())))
112                             .filter(QueryBuilders.termQuery(fessConfig.getIndexFieldConfigId(), configIdValue));
113 
114             final List<Map<String, Object>> docs =
115                     getDocumentListByQuery(fessEsClient, queryBuilder,
116                             new String[] { fessConfig.getIndexFieldId(), fessConfig.getIndexFieldDocId() });
117             for (final Map<String, Object> doc : docs) {
118                 final Object oldIdValue = doc.get(fessConfig.getIndexFieldId());
119                 if (!idValue.equals(oldIdValue) && oldIdValue != null) {
120                     final Object oldDocIdValue = doc.get(fessConfig.getIndexFieldDocId());
121                     if (oldDocIdValue != null) {
122                         docIdList.add(oldDocIdValue.toString());
123                     }
124                 }
125             }
126             if (logger.isDebugEnabled()) {
127                 logger.debug("{} => {}", queryBuilder, docs);
128             }
129         }
130         if (!docIdList.isEmpty()) {
131             fessEsClient.deleteByQuery(fessConfig.getIndexDocumentUpdateIndex(),
132                     QueryBuilders.idsQuery().addIds(docIdList.stream().toArray(n -> new String[n])));
133 
134         }
135     }
136 
137     public boolean updateDocument(final FessEsClient fessEsClient, final String id, final String field, final Object value) {
138         final FessConfig fessConfig = ComponentUtil.getFessConfig();
139         return fessEsClient.update(fessConfig.getIndexDocumentUpdateIndex(), id, field, value);
140     }
141 
142     public boolean deleteDocument(final FessEsClient fessEsClient, final String id) {
143         final FessConfig fessConfig = ComponentUtil.getFessConfig();
144         return fessEsClient.delete(fessConfig.getIndexDocumentUpdateIndex(), id);
145     }
146 
147     public long deleteDocumentByUrl(final FessEsClient fessEsClient, final String url) {
148         final FessConfig fessConfig = ComponentUtil.getFessConfig();
149         return fessEsClient.deleteByQuery(fessConfig.getIndexDocumentUpdateIndex(),
150                 QueryBuilders.termQuery(fessConfig.getIndexFieldUrl(), url));
151     }
152 
153     public long deleteDocumentsByDocId(final FessEsClient fessEsClient, final List<String> docIdList) {
154         final FessConfig fessConfig = ComponentUtil.getFessConfig();
155         return fessEsClient.deleteByQuery(fessConfig.getIndexDocumentUpdateIndex(),
156                 QueryBuilders.idsQuery().addIds(docIdList.stream().toArray(n -> new String[n])));
157     }
158 
159     public long deleteDocumentByQuery(final FessEsClient fessEsClient, final QueryBuilder queryBuilder) {
160         final FessConfig fessConfig = ComponentUtil.getFessConfig();
161         return fessEsClient.deleteByQuery(fessConfig.getIndexDocumentUpdateIndex(), queryBuilder);
162     }
163 
164     public Map<String, Object> getDocument(final FessEsClient fessEsClient, final String id, final String[] fields) {
165         final FessConfig fessConfig = ComponentUtil.getFessConfig();
166         return fessEsClient.getDocument(fessConfig.getIndexDocumentUpdateIndex(), builder -> {
167             builder.setQuery(QueryBuilders.idsQuery().addIds(id));
168             builder.setFetchSource(fields, null);
169             return true;
170         }).orElse(null);
171     }
172 
173     public List<Map<String, Object>> getDocumentListByPrefixId(final FessEsClient fessEsClient, final String id, final String[] fields) {
174         final FessConfig fessConfig = ComponentUtil.getFessConfig();
175         final QueryBuilder queryBuilder = QueryBuilders.prefixQuery(fessConfig.getIndexFieldId(), id);
176         return getDocumentListByQuery(fessEsClient, queryBuilder, fields);
177     }
178 
179     public void deleteChildDocument(final FessEsClient fessEsClient, final String id) {
180         final FessConfig fessConfig = ComponentUtil.getFessConfig();
181         fessEsClient.deleteByQuery(fessConfig.getIndexDocumentUpdateIndex(),
182                 QueryBuilders.termQuery(fessConfig.getIndexFieldParentId(), id));
183     }
184 
185     public List<Map<String, Object>> getChildDocumentList(final FessEsClient fessEsClient, final String id, final String[] fields) {
186         final FessConfig fessConfig = ComponentUtil.getFessConfig();
187         final QueryBuilder queryBuilder = QueryBuilders.termQuery(fessConfig.getIndexFieldParentId(), id);
188         return getDocumentListByQuery(fessEsClient, queryBuilder, fields);
189     }
190 
191     protected List<Map<String, Object>> getDocumentListByQuery(final FessEsClient fessEsClient, final QueryBuilder queryBuilder,
192             final String[] fields) {
193         final FessConfig fessConfig = ComponentUtil.getFessConfig();
194 
195         final SearchResponse countResponse =
196                 fessEsClient.prepareSearch(fessConfig.getIndexDocumentUpdateIndex()).setQuery(queryBuilder).setSize(0).execute()
197                         .actionGet(fessConfig.getIndexSearchTimeout());
198         final long numFound = countResponse.getHits().getTotalHits().value;
199         // TODO max threshold
200 
201         return fessEsClient.getDocumentList(fessConfig.getIndexDocumentUpdateIndex(), requestBuilder -> {
202             requestBuilder.setQuery(queryBuilder).setSize((int) numFound);
203             if (fields != null) {
204                 requestBuilder.setFetchSource(fields, null);
205             }
206             return true;
207         });
208 
209     }
210 
211     public long calculateDocumentSize(final Map<String, Object> dataMap) {
212         return MemoryUtil.sizeOf(dataMap);
213     }
214 
215     public void setMaxRetryCount(final int maxRetryCount) {
216         this.maxRetryCount = maxRetryCount;
217     }
218 
219     public void setDefaultRowSize(final int defaultRowSize) {
220         this.defaultRowSize = defaultRowSize;
221     }
222 
223     public void setRequestInterval(final long requestInterval) {
224         this.requestInterval = requestInterval;
225     }
226 
227 }