Class FessXpathTransformer

  • All Implemented Interfaces:
    FessTransformer, org.codelibs.fess.crawler.transformer.Transformer

    public class FessXpathTransformer
    extends org.codelibs.fess.crawler.transformer.impl.XpathTransformer
    implements FessTransformer
    • Field Detail

      • prunedContent

        public boolean prunedContent
      • useGoogleOffOn

        protected boolean useGoogleOffOn
    • Constructor Detail

      • FessXpathTransformer

        public FessXpathTransformer()
    • Method Detail

      • init

        @PostConstruct
        public void init()
      • storeData

        protected void storeData​(org.codelibs.fess.crawler.entity.ResponseData responseData,
                                 org.codelibs.fess.crawler.entity.ResultData resultData)
        Overrides:
        storeData in class org.codelibs.fess.crawler.transformer.impl.XpathTransformer
      • normalizeData

        protected void normalizeData​(org.codelibs.fess.crawler.entity.ResponseData responseData,
                                     Map<String,​Object> dataMap)
      • processMetaRobots

        protected void processMetaRobots​(org.codelibs.fess.crawler.entity.ResponseData responseData,
                                         org.codelibs.fess.crawler.entity.ResultData resultData,
                                         Document document)
      • processXRobotsTag

        protected void processXRobotsTag​(org.codelibs.fess.crawler.entity.ResponseData responseData,
                                         org.codelibs.fess.crawler.entity.ResultData resultData)
      • isValidUrl

        protected boolean isValidUrl​(String urlStr)
      • isValidCanonicalUrl

        protected boolean isValidCanonicalUrl​(String url,
                                              String canonicalUrl)
      • putAdditionalData

        protected void putAdditionalData​(Map<String,​Object> dataMap,
                                         org.codelibs.fess.crawler.entity.ResponseData responseData,
                                         Document document)
      • getCanonicalUrl

        protected String getCanonicalUrl​(org.codelibs.fess.crawler.entity.ResponseData responseData,
                                         Document document)
      • normalizeCanonicalUrl

        protected String normalizeCanonicalUrl​(String baseUrl,
                                               String canonicalUrl)
      • removeCommentTag

        protected String removeCommentTag​(String content)
      • getSingleNodeValue

        protected String getSingleNodeValue​(Document document,
                                            String xpath,
                                            boolean pruned)
      • parseTextContent

        protected void parseTextContent​(Node node,
                                        StringBuilder buf)
      • processGoogleOffOn

        protected Node processGoogleOffOn​(Node node,
                                          org.codelibs.core.misc.ValueHolder<Boolean> flag)
      • pruneNode

        protected Node pruneNode​(Node node)
      • isPrunedTag

        protected boolean isPrunedTag​(Node node)
      • getMultipleNodeValue

        protected String getMultipleNodeValue​(Document document,
                                              String xpath)
      • replaceDuplicateHost

        protected String replaceDuplicateHost​(String url)
      • getAnchorList

        protected List<String> getAnchorList​(Document document,
                                             org.codelibs.fess.crawler.entity.ResponseData responseData)
      • convertChildUrlList

        protected List<org.codelibs.fess.crawler.entity.RequestData> convertChildUrlList​(List<org.codelibs.fess.crawler.entity.RequestData> urlList)
        Overrides:
        convertChildUrlList in class org.codelibs.fess.crawler.transformer.impl.HtmlTransformer
      • getData

        public Object getData​(org.codelibs.fess.crawler.entity.AccessResultData<?> accessResultData)
        Specified by:
        getData in interface org.codelibs.fess.crawler.transformer.Transformer
        Overrides:
        getData in class org.codelibs.fess.crawler.transformer.impl.XpathTransformer
      • addChildUrlFromTagAttribute

        protected void addChildUrlFromTagAttribute​(List<String> urlList,
                                                   URL url,
                                                   String attrValue,
                                                   String encoding)
        Overrides:
        addChildUrlFromTagAttribute in class org.codelibs.fess.crawler.transformer.impl.HtmlTransformer
      • setUseGoogleOffOn

        public void setUseGoogleOffOn​(boolean useGoogleOffOn)
      • getThumbnailUrl

        protected String getThumbnailUrl​(org.codelibs.fess.crawler.entity.ResponseData responseData,
                                         Document document)
      • addFieldRule

        public void addFieldRule​(String name,
                                 String xpath,
                                 boolean isPruned)