Password-protected PDF
This page is generated by Machine Translation from Japanese.
How to support password-protected PDF
You should password files to register the settings file to PDF password is configured to search for.
Setting
First of all, create the webapps/fess/WEB-INF/classes/s2robot_extractor.dicon. This is test _ ~ is a pass that password set to a.pdf file. If you have multiple files, multiple settings in addPassword.
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE components PUBLIC "-//SEASAR//DTD S2Container 2.4//EN"
"http://www.seasar.org/dtd/components24.dtd">
<components>
<component name="tikaExtractor" class="org.seasar.robot.extractor.impl.TikaExtractor"/>
<component name="msWordExtractor"
class="org.seasar.robot.extractor.impl.MsWordExtractor"/>
<component name="msExcelExtractor"
class="org.seasar.robot.extractor.impl.MsExcelExtractor"/>
<component name="msPowerPointExtractor"
class="org.seasar.robot.extractor.impl.MsPowerPointExtractor"/>
<component name="msPublisherExtractor"
class="org.seasar.robot.extractor.impl.MsPublisherExtractor"/>
<component name="msVisioExtractor"
class="org.seasar.robot.extractor.impl.MsVisioExtractor"/>
<component name="pdfExtractor" class="org.seasar.robot.extractor.impl.PdfExtractor">
<initMethod name="addPassword">
<!-- 正規表現で対象ファイルのパスを指定 -->
<arg>".*test_.*.pdf"</arg>
<!-- パスワード -->
<arg>"pass"</arg>
</initMethod>
</component>
<component name="textExtractor" class="org.seasar.robot.extractor.impl.TextExtractor"/>
<component name="htmlExtractor" class="org.seasar.robot.extractor.impl.HtmlExtractor"/>
<component name="xmlExtractor" class="org.seasar.robot.extractor.impl.XmlExtractor"/>
<component name="htmlXpathExtractor"
class="org.seasar.robot.extractor.impl.HtmlXpathExtractor">
<initMethod name="addFeature">
<arg>"http://xml.org/sax/features/namespaces"</arg>
<arg>"false"</arg>
</initMethod>
</component>
...
Next, edit the following to webapps/fess/WEB-INF/classes/s2robot_rule.dicon.
...
<component name="fsFileRule" class="org.seasar.robot.rule.impl.RegexRule" >
<property name="ruleId">"fsFileRule"</property>
<property name="responseProcessor">
<component class="org.seasar.robot.processor.impl.DefaultResponseProcessor">
<property name="transformer">fessFileTransformer</property>
</component>
</property>
<property name="allRequired">true</property>
<initMethod name="addRule">
<arg>"url"</arg>
<arg>"file:.*"</arg>
</initMethod>
<initMethod name="addRule">
<arg>"mimeType"</arg>
<!-- Supported MIME type -->
<arg>
"(application/xml"
+ "|application/xhtml+xml"
+ "|application/rdf+xml"
+ "|application/pdf"
+ "|text/xml"
+ "|text/xml-external-parsed-entity"
+ "|text/html)"
</arg>
</initMethod>
</component>
...
Fess up after setting the above and please run the crawl. How to use Basic remains especially.