View Javadoc
1   /*
2    * Copyright 2012-2020 CodeLibs Project and the Others.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
13   * either express or implied. See the License for the specific language
14   * governing permissions and limitations under the License.
15   */
16  package org.codelibs.fess.ds.callback;
17  
18  import java.util.HashSet;
19  import java.util.Map;
20  import java.util.Set;
21  import java.util.concurrent.atomic.AtomicLong;
22  
23  import javax.annotation.PostConstruct;
24  
25  import org.apache.logging.log4j.LogManager;
26  import org.apache.logging.log4j.Logger;
27  import org.codelibs.core.stream.StreamUtil;
28  import org.codelibs.fess.es.client.FessEsClient;
29  import org.codelibs.fess.exception.DataStoreException;
30  import org.codelibs.fess.helper.CrawlingInfoHelper;
31  import org.codelibs.fess.helper.IndexingHelper;
32  import org.codelibs.fess.helper.SearchLogHelper;
33  import org.codelibs.fess.helper.SystemHelper;
34  import org.codelibs.fess.ingest.IngestFactory;
35  import org.codelibs.fess.ingest.Ingester;
36  import org.codelibs.fess.mylasta.direction.FessConfig;
37  import org.codelibs.fess.util.ComponentUtil;
38  import org.codelibs.fess.util.DocList;
39  import org.codelibs.fess.util.DocumentUtil;
40  import org.codelibs.fess.util.MemoryUtil;
41  
42  public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
43      private static final Logger logger = LogManager.getLogger(IndexUpdateCallbackImpl.class);
44  
45      protected AtomicLong documentSize = new AtomicLong(0);
46  
47      protected volatile long executeTime = 0;
48  
49      protected final DocListist.html#DocList">DocList docList = new DocList();
50  
51      protected long maxDocumentRequestSize;
52  
53      protected int maxDocumentCacheSize;
54  
55      private IngestFactory ingestFactory = null;
56  
57      @PostConstruct
58      public void init() {
59          if (logger.isDebugEnabled()) {
60              logger.debug("Initialize {}", this.getClass().getSimpleName());
61          }
62          maxDocumentRequestSize = Long.parseLong(ComponentUtil.getFessConfig().getIndexerDataMaxDocumentRequestSize());
63          maxDocumentCacheSize = ComponentUtil.getFessConfig().getIndexerDataMaxDocumentCacheSizeAsInteger();
64          if (ComponentUtil.hasIngestFactory()) {
65              ingestFactory = ComponentUtil.getIngestFactory();
66          }
67      }
68  
69      /* (non-Javadoc)
70       * @see org.codelibs.fess.ds.callback.IndexUpdateCallback#store(java.util.Map)
71       */
72      @Override
73      public void store(final Map<String, String> paramMap, final Map<String, Object> dataMap) {
74          final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
75          systemHelper.calibrateCpuLoad();
76  
77          final long startTime = System.currentTimeMillis();
78          final FessConfig fessConfig = ComponentUtil.getFessConfig();
79          final FessEsClient fessEsClient = ComponentUtil.getFessEsClient();
80  
81          if (logger.isDebugEnabled()) {
82              logger.debug("Adding {}", dataMap);
83          }
84  
85          //   required check
86          final Object urlObj = dataMap.get(fessConfig.getIndexFieldUrl());
87          if (urlObj == null) {
88              throw new DataStoreException("url is null. dataMap=" + dataMap);
89          }
90  
91          final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper();
92          final CrawlingInfoHelper crawlingInfoHelper = ComponentUtil.getCrawlingInfoHelper();
93          dataMap.put(fessConfig.getIndexFieldId(), crawlingInfoHelper.generateId(dataMap));
94  
95          final String url = dataMap.get(fessConfig.getIndexFieldUrl()).toString();
96  
97          if (fessConfig.getIndexerClickCountEnabledAsBoolean()) {
98              addClickCountField(dataMap, url, fessConfig.getIndexFieldClickCount());
99          }
100 
101         if (fessConfig.getIndexerFavoriteCountEnabledAsBoolean()) {
102             addFavoriteCountField(dataMap, url, fessConfig.getIndexFieldFavoriteCount());
103         }
104 
105         final Set<String> matchedLabelSet = ComponentUtil.getLabelTypeHelper().getMatchedLabelValueSet(url);
106         if (!matchedLabelSet.isEmpty()) {
107             final Set<String> newLabelSet = new HashSet<>();
108             final String[] oldLabels = DocumentUtil.getValue(dataMap, fessConfig.getIndexFieldLabel(), String[].class);
109             StreamUtil.stream(oldLabels).of(stream -> stream.forEach(newLabelSet::add));
110             matchedLabelSet.stream().forEach(newLabelSet::add);
111             dataMap.put(fessConfig.getIndexFieldLabel(), newLabelSet.toArray(new String[newLabelSet.size()]));
112         }
113 
114         if (!dataMap.containsKey(fessConfig.getIndexFieldDocId())) {
115             dataMap.put(fessConfig.getIndexFieldDocId(), systemHelper.generateDocId(dataMap));
116         }
117 
118         ComponentUtil.getLanguageHelper().updateDocument(dataMap);
119 
120         synchronized (docList) {
121             docList.add(ingest(paramMap, dataMap));
122             final long contentSize = indexingHelper.calculateDocumentSize(dataMap);
123             docList.addContentSize(contentSize);
124             final long processingTime = System.currentTimeMillis() - startTime;
125             docList.addProcessingTime(processingTime);
126             if (logger.isDebugEnabled()) {
127                 logger.debug("Added the document({}, {}ms). The number of a document cache is {}.",
128                         MemoryUtil.byteCountToDisplaySize(contentSize), processingTime, docList.size());
129             }
130 
131             if (docList.getContentSize() >= maxDocumentRequestSize || docList.size() >= maxDocumentCacheSize) {
132                 indexingHelper.sendDocuments(fessEsClient, docList);
133             }
134             executeTime += processingTime;
135         }
136 
137         documentSize.getAndIncrement();
138 
139         if (logger.isDebugEnabled()) {
140             logger.debug("The number of an added document is {}.", documentSize.get());
141         }
142 
143     }
144 
145     protected Map<String, Object> ingest(final Map<String, String> paramMap, final Map<String, Object> dataMap) {
146         if (ingestFactory == null) {
147             return dataMap;
148         }
149         Map<String, Object> target = dataMap;
150         for (final Ingester ingester : ingestFactory.getIngesters()) {
151             try {
152                 target = ingester.process(target, paramMap);
153             } catch (final Exception e) {
154                 logger.warn("Failed to process Ingest[{}]", ingester.getClass().getSimpleName(), e);
155             }
156         }
157         return target;
158     }
159 
160     @Override
161     public void commit() {
162         synchronized (docList) {
163             if (!docList.isEmpty()) {
164                 final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper();
165                 final FessEsClient fessEsClient = ComponentUtil.getFessEsClient();
166                 indexingHelper.sendDocuments(fessEsClient, docList);
167             }
168         }
169     }
170 
171     protected void addClickCountField(final Map<String, Object> doc, final String url, final String clickCountField) {
172         final SearchLogHelper searchLogHelper = ComponentUtil.getSearchLogHelper();
173         final int count = searchLogHelper.getClickCount(url);
174         doc.put(clickCountField, count);
175         if (logger.isDebugEnabled()) {
176             logger.debug("Click Count: {}, url: {}", count, url);
177         }
178     }
179 
180     protected void addFavoriteCountField(final Map<String, Object> doc, final String url, final String favoriteCountField) {
181         final SearchLogHelper searchLogHelper = ComponentUtil.getSearchLogHelper();
182         final long count = searchLogHelper.getFavoriteCount(url);
183         doc.put(favoriteCountField, count);
184         if (logger.isDebugEnabled()) {
185             logger.debug("Favorite Count: {}, url: {}", count, url);
186         }
187     }
188 
189     @Override
190     public long getDocumentSize() {
191         return documentSize.get();
192     }
193 
194     @Override
195     public long getExecuteTime() {
196         return executeTime;
197     }
198 
199 }