Menu

Password-protected PDF

This page is generated by Machine Translation from Japanese.

How to support password-protected PDF

You should password files to register the settings file to PDF password is configured to search for.

Setting

First of all, create the webapps/fess/WEB-INF/classes/s2robot_extractor.dicon. This is test _ ~ is a pass that password set to a.pdf file. If you have multiple files, multiple settings in addPassword.

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE components PUBLIC "-//SEASAR//DTD S2Container 2.4//EN"
    "http://www.seasar.org/dtd/components24.dtd">
<components>
    <component name="tikaExtractor" class="org.seasar.robot.extractor.impl.TikaExtractor"/>
    <component name="msWordExtractor"
        class="org.seasar.robot.extractor.impl.MsWordExtractor"/>
    <component name="msExcelExtractor"
        class="org.seasar.robot.extractor.impl.MsExcelExtractor"/>
    <component name="msPowerPointExtractor"
        class="org.seasar.robot.extractor.impl.MsPowerPointExtractor"/>
    <component name="msPublisherExtractor"
        class="org.seasar.robot.extractor.impl.MsPublisherExtractor"/>
    <component name="msVisioExtractor"
        class="org.seasar.robot.extractor.impl.MsVisioExtractor"/>
    <component name="pdfExtractor" class="org.seasar.robot.extractor.impl.PdfExtractor">
        <initMethod name="addPassword">
            <!-- 正規表現で対象ファイルのパスを指定 -->
            <arg>".*test_.*.pdf"</arg>
            <!-- パスワード -->
            <arg>"pass"</arg>
        </initMethod>
    </component>
    <component name="textExtractor" class="org.seasar.robot.extractor.impl.TextExtractor"/>
    <component name="htmlExtractor" class="org.seasar.robot.extractor.impl.HtmlExtractor"/>
    <component name="xmlExtractor" class="org.seasar.robot.extractor.impl.XmlExtractor"/>
    <component name="htmlXpathExtractor"
        class="org.seasar.robot.extractor.impl.HtmlXpathExtractor">
        <initMethod name="addFeature">
            <arg>"http://xml.org/sax/features/namespaces"</arg>
            <arg>"false"</arg>
        </initMethod>
    </component>

...

Next, edit the following to webapps/fess/WEB-INF/classes/s2robot_rule.dicon.

...
    <component name="fsFileRule" class="org.seasar.robot.rule.impl.RegexRule" >
        <property name="ruleId">"fsFileRule"</property>
        <property name="responseProcessor">
            <component class="org.seasar.robot.processor.impl.DefaultResponseProcessor">
                <property name="transformer">fessFileTransformer</property>
            </component>
        </property>
        <property name="allRequired">true</property>
        <initMethod name="addRule">
            <arg>"url"</arg>
            <arg>"file:.*"</arg>
        </initMethod>
        <initMethod name="addRule">
            <arg>"mimeType"</arg>
            <!-- Supported MIME type -->
            <arg>
  "(application/xml"
+ "|application/xhtml+xml"
+ "|application/rdf+xml"
+ "|application/pdf"
+ "|text/xml"
+ "|text/xml-external-parsed-entity"
+ "|text/html)"
            </arg>
        </initMethod>
    </component>
...

Fess up after setting the above and please run the crawl. How to use Basic remains especially.