View Javadoc
1   /*
2    * Copyright 2012-2020 CodeLibs Project and the Others.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
13   * either express or implied. See the License for the specific language
14   * governing permissions and limitations under the License.
15   */
16  package org.codelibs.fess.es.config.exentity;
17  
18  import java.util.Map;
19  import java.util.function.Supplier;
20  
21  import org.apache.http.auth.UsernamePasswordCredentials;
22  import org.codelibs.core.lang.StringUtil;
23  import org.codelibs.fess.crawler.client.CrawlerClientFactory;
24  import org.codelibs.fess.crawler.client.ftp.FtpClient;
25  import org.codelibs.fess.crawler.client.http.HcHttpClient;
26  import org.codelibs.fess.crawler.client.smb.SmbClient;
27  import org.codelibs.fess.mylasta.direction.FessConfig;
28  import org.codelibs.fess.util.ComponentUtil;
29  
30  public interface CrawlingConfig {
31  
32      String getId();
33  
34      String getName();
35  
36      String[] getPermissions();
37  
38      String[] getVirtualHosts();
39  
40      String getDocumentBoost();
41  
42      String getIndexingTarget(String input);
43  
44      String getConfigId();
45  
46      Integer getTimeToLive();
47  
48      CrawlerClientFactory initializeClientFactory(Supplier<CrawlerClientFactory> creator);
49  
50      Map<String, String> getConfigParameterMap(ConfigName name);
51  
52      default void initializeDefaultHttpProxy(final Map<String, Object> paramMap) {
53          final FessConfig fessConfig = ComponentUtil.getFessConfig();
54          final String proxyHost = fessConfig.getHttpProxyHost();
55          final String proxyPort = fessConfig.getHttpProxyPort();
56          if (StringUtil.isNotBlank(proxyHost) && StringUtil.isNotBlank(proxyPort)) {
57              paramMap.put(Param.Client.PROXY_HOST, proxyHost);
58              paramMap.put(Param.Client.PROXY_PORT, proxyPort);
59              final String proxyUsername = fessConfig.getHttpProxyUsername();
60              final String proxyPassword = fessConfig.getHttpProxyPassword();
61              if (proxyUsername != null && proxyPassword != null) {
62                  paramMap.put(HcHttpClient.PROXY_CREDENTIALS_PROPERTY, new UsernamePasswordCredentials(proxyUsername, proxyPassword));
63              }
64  
65          }
66      }
67  
68      public enum ConfigType {
69          WEB("W"), FILE("F"), DATA("D");
70  
71          private final String typePrefix;
72  
73          ConfigType(final String typePrefix) {
74              this.typePrefix = typePrefix;
75          }
76  
77          public String getTypePrefix() {
78              return typePrefix;
79          }
80  
81          String getConfigId(final String id) {
82              if (id == null) {
83                  return null;
84              }
85              return typePrefix + id.toString();
86          }
87      }
88  
89      public enum ConfigName {
90          CLIENT, XPATH, META, VALUE, SCRIPT, FIELD, CONFIG;
91      }
92  
93      public static class Param {
94          // client.*
95          public static class Client {
96              public static final String SMB_AUTHENTICATIONS = SmbClient.SMB_AUTHENTICATIONS_PROPERTY;
97              public static final String SMB1_AUTHENTICATIONS = org.codelibs.fess.crawler.client.smb1.SmbClient.SMB_AUTHENTICATIONS_PROPERTY;
98              public static final String FTP_AUTHENTICATIONS = FtpClient.FTP_AUTHENTICATIONS_PROPERTY;
99              public static final String ROBOTS_TXT_ENABLED = HcHttpClient.ROBOTS_TXT_ENABLED_PROPERTY;
100             public static final String PROXY_PASSWORD = "proxyPassword";
101             public static final String PROXY_USERNAME = "proxyUsername";
102             public static final String PROXY_PORT = HcHttpClient.PROXY_PORT_PROPERTY;
103             public static final String PROXY_HOST = HcHttpClient.PROXY_HOST_PROPERTY;
104             public static final String USER_AGENT = HcHttpClient.USER_AGENT_PROPERTY;
105         }
106 
107         // xpath.*
108         public static class XPath {
109             public static final String DEFAULT_LANG = "default.lang";
110             public static final String DEFAULT_CONTENT = "default.content";
111             public static final String DEFAULT_DIGEST = "default.digest";
112             // xapth.<field>=<value>
113         }
114 
115         // config.*
116         public static class Config {
117             public static final String KEEP_ORIGINAL_BODY = "keep.original.body";
118             public static final String CLEANUP_ALL = "cleanup.all";
119             public static final String CLEANUP_URL_FILTERS = "cleanup.urlFilters";
120             public static final String JCIFS_PREFIX = "jcifs.";
121             public static final String HTML_CANONICAL_XPATH = "html.canonical.xpath";
122             public static final String PIPELINE = "pipeline";
123             public static final String IGNORE_ROBOTS_TAGS = "ignore.robots.tags";
124         }
125 
126         // meta.*
127         // meta.<field>=<value>
128 
129         // value.*
130         // value.<field>=<value>
131 
132         // script.*
133         // script.<field>=<value>
134 
135         // field.*
136         // field.<field>=<value>
137     }
138 }