View Javadoc
1   /*
2    * Copyright 2012-2020 CodeLibs Project and the Others.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
13   * either express or implied. See the License for the specific language
14   * governing permissions and limitations under the License.
15   */
16  package org.codelibs.fess.helper;
17  
18  import java.util.ArrayList;
19  import java.util.Collections;
20  import java.util.List;
21  import java.util.Map;
22  import java.util.concurrent.ConcurrentHashMap;
23  import java.util.concurrent.ExecutionException;
24  import java.util.concurrent.TimeUnit;
25  import java.util.regex.Pattern;
26  
27  import javax.annotation.PostConstruct;
28  
29  import org.apache.logging.log4j.LogManager;
30  import org.apache.logging.log4j.Logger;
31  import org.codelibs.core.lang.StringUtil;
32  import org.codelibs.fess.Constants;
33  import org.codelibs.fess.app.service.DataConfigService;
34  import org.codelibs.fess.app.service.FileConfigService;
35  import org.codelibs.fess.app.service.WebConfigService;
36  import org.codelibs.fess.es.config.exbhv.DataConfigBhv;
37  import org.codelibs.fess.es.config.exbhv.FailureUrlBhv;
38  import org.codelibs.fess.es.config.exbhv.FileConfigBhv;
39  import org.codelibs.fess.es.config.exbhv.WebConfigBhv;
40  import org.codelibs.fess.es.config.exentity.CrawlingConfig;
41  import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigName;
42  import org.codelibs.fess.es.config.exentity.CrawlingConfig.ConfigType;
43  import org.codelibs.fess.es.config.exentity.CrawlingConfig.Param.Config;
44  import org.codelibs.fess.es.config.exentity.DataConfig;
45  import org.codelibs.fess.es.config.exentity.FailureUrl;
46  import org.codelibs.fess.es.config.exentity.FileConfig;
47  import org.codelibs.fess.es.config.exentity.WebConfig;
48  import org.codelibs.fess.mylasta.direction.FessConfig;
49  import org.codelibs.fess.util.ComponentUtil;
50  import org.dbflute.cbean.result.ListResultBean;
51  import org.dbflute.optional.OptionalThing;
52  
53  import com.google.common.cache.Cache;
54  import com.google.common.cache.CacheBuilder;
55  
56  public class CrawlingConfigHelper {
57  
58      private static final Logger logger = LogManager.getLogger(CrawlingConfigHelper.class);
59  
60      protected final Map<String, CrawlingConfig> crawlingConfigMap = new ConcurrentHashMap<>();
61  
62      protected int count = 1;
63  
64      protected Cache<String, CrawlingConfig> crawlingConfigCache;
65  
66      @PostConstruct
67      public void init() {
68          if (logger.isDebugEnabled()) {
69              logger.debug("Initialize {}", this.getClass().getSimpleName());
70          }
71          crawlingConfigCache = CacheBuilder.newBuilder().maximumSize(100).expireAfterWrite(10, TimeUnit.MINUTES).build();
72      }
73  
74      public ConfigType getConfigType(final String configId) {
75          if (configId == null || configId.length() < 2) {
76              return null;
77          }
78          final String configType = configId.substring(0, 1);
79          if (ConfigType.WEB.getTypePrefix().equals(configType)) {
80              return ConfigType.WEB;
81          } else if (ConfigType.FILE.getTypePrefix().equals(configType)) {
82              return ConfigType.FILE;
83          } else if (ConfigType.DATA.getTypePrefix().equals(configType)) {
84              return ConfigType.DATA;
85          }
86          return null;
87      }
88  
89      protected String getId(final String configId) {
90          if (configId == null || configId.length() < 2) {
91              return null;
92          }
93          return configId.substring(1);
94      }
95  
96      public CrawlingConfig getCrawlingConfig(final String configId) {
97          try {
98              return crawlingConfigCache.get(configId, () -> {
99                  final ConfigType configType = getConfigType(configId);
100                 if (configType == null) {
101                     return null;
102                 }
103                 final String id = getId(configId);
104                 if (id == null) {
105                     return null;
106                 }
107                 switch (configType) {
108                 case WEB:
109                     final WebConfigService webConfigService = ComponentUtil.getComponent(WebConfigService.class);
110                     return webConfigService.getWebConfig(id).get();
111                 case FILE:
112                     final FileConfigService fileConfigService = ComponentUtil.getComponent(FileConfigService.class);
113                     return fileConfigService.getFileConfig(id).get();
114                 case DATA:
115                     final DataConfigService dataConfigService = ComponentUtil.getComponent(DataConfigService.class);
116                     return dataConfigService.getDataConfig(id).get();
117                 default:
118                     return null;
119                 }
120             });
121         } catch (final ExecutionException e) {
122             logger.warn("Failed to access a crawling config cache: " + configId, e);
123             return null;
124         }
125     }
126 
127     public OptionalThing<String> getPipeline(final String configId) {
128         final CrawlingConfig config = getCrawlingConfig(configId);
129         if (config == null) {
130             return OptionalThing.empty();
131         }
132         final String pipeline = config.getConfigParameterMap(ConfigName.CONFIG).get(Config.PIPELINE);
133         if (StringUtil.isBlank(pipeline)) {
134             return OptionalThing.empty();
135         }
136         return OptionalThing.of(pipeline);
137     }
138 
139     public void refresh() {
140         crawlingConfigCache.invalidateAll();
141     }
142 
143     public synchronized String store(final String sessionId, final CrawlingConfig crawlingConfig) {
144         final String sessionCountId = sessionId + "-" + count;
145         crawlingConfigMap.put(sessionCountId, crawlingConfig);
146         count++;
147         return sessionCountId;
148     }
149 
150     public void remove(final String sessionId) {
151         crawlingConfigMap.remove(sessionId);
152     }
153 
154     public CrawlingConfig get(final String sessionId) {
155         return crawlingConfigMap.get(sessionId);
156     }
157 
158     public List<WebConfig> getAllWebConfigList() {
159         return getAllWebConfigList(true, true, true, null);
160     }
161 
162     public List<WebConfig> getWebConfigListByIds(final List<String> idList) {
163         if (idList == null) {
164             return getAllWebConfigList();
165         } else {
166             return getAllWebConfigList(true, true, false, idList);
167         }
168     }
169 
170     public List<WebConfig> getAllWebConfigList(final boolean withLabelType, final boolean withRoleType, final boolean available,
171             final List<String> idList) {
172         return ComponentUtil.getComponent(WebConfigBhv.class).selectList(cb -> {
173             if (available) {
174                 cb.query().setAvailable_Equal(Constants.T);
175             }
176             if (idList != null) {
177                 cb.query().setId_InScope(idList);
178             }
179             cb.query().addOrderBy_SortOrder_Asc();
180             cb.query().addOrderBy_Name_Asc();
181             cb.fetchFirst(ComponentUtil.getFessConfig().getPageWebConfigMaxFetchSizeAsInteger());
182         });
183     }
184 
185     public List<FileConfig> getAllFileConfigList() {
186         return getAllFileConfigList(true, true, true, null);
187     }
188 
189     public List<FileConfig> getFileConfigListByIds(final List<String> idList) {
190         if (idList == null) {
191             return getAllFileConfigList();
192         } else {
193             return getAllFileConfigList(true, true, false, idList);
194         }
195     }
196 
197     public List<FileConfig> getAllFileConfigList(final boolean withLabelType, final boolean withRoleType, final boolean available,
198             final List<String> idList) {
199         return ComponentUtil.getComponent(FileConfigBhv.class).selectList(cb -> {
200             if (available) {
201                 cb.query().setAvailable_Equal(Constants.T);
202             }
203             if (idList != null) {
204                 cb.query().setId_InScope(idList);
205             }
206             cb.query().addOrderBy_SortOrder_Asc();
207             cb.query().addOrderBy_Name_Asc();
208             cb.fetchFirst(ComponentUtil.getFessConfig().getPageFileConfigMaxFetchSizeAsInteger());
209         });
210     }
211 
212     public List<DataConfig> getAllDataConfigList() {
213         return getAllDataConfigList(true, true, true, null);
214     }
215 
216     public List<DataConfig> getDataConfigListByIds(final List<String> idList) {
217         if (idList == null) {
218             return getAllDataConfigList();
219         } else {
220             return getAllDataConfigList(true, true, false, idList);
221         }
222     }
223 
224     public List<DataConfig> getAllDataConfigList(final boolean withLabelType, final boolean withRoleType, final boolean available,
225             final List<String> idList) {
226         return ComponentUtil.getComponent(DataConfigBhv.class).selectList(cb -> {
227             if (available) {
228                 cb.query().setAvailable_Equal(Constants.T);
229             }
230             if (idList != null) {
231                 cb.query().setId_InScope(idList);
232             }
233             cb.query().addOrderBy_SortOrder_Asc();
234             cb.query().addOrderBy_Name_Asc();
235             cb.fetchFirst(ComponentUtil.getFessConfig().getPageDataConfigMaxFetchSizeAsInteger());
236         });
237     }
238 
239     public List<String> getExcludedUrlList(final String configId) {
240         final FessConfig fessConfig = ComponentUtil.getFessConfig();
241         final int failureCount = fessConfig.getFailureCountThreshold();
242         final String ignoreFailureType = fessConfig.getIgnoreFailureType();
243 
244         if (failureCount < 0) {
245             return Collections.emptyList();
246         }
247 
248         final int count = failureCount;
249         final ListResultBean<FailureUrl> list = ComponentUtil.getComponent(FailureUrlBhv.class).selectList(cb -> {
250             cb.query().setConfigId_Equal(configId);
251             cb.query().setErrorCount_GreaterEqual(count);
252             cb.fetchFirst(fessConfig.getPageFailureUrlMaxFetchSizeAsInteger());
253         });
254         if (list.isEmpty()) {
255             return Collections.emptyList();
256         }
257 
258         Pattern pattern = null;
259         if (StringUtil.isNotBlank(ignoreFailureType)) {
260             pattern = Pattern.compile(ignoreFailureType);
261         }
262         final List<String> urlList = new ArrayList<>();
263         for (final FailureUrl failureUrl : list) {
264             if (pattern != null) {
265                 if (!pattern.matcher(failureUrl.getErrorName()).matches()) {
266                     urlList.add(failureUrl.getUrl());
267                 }
268             } else {
269                 urlList.add(failureUrl.getUrl());
270             }
271         }
272         return urlList;
273     }
274 }