Shinsuke Sugaya 10 år sedan
förälder
incheckning
70bd6fa900

+ 8 - 20
src/main/java/jp/sf/fess/ds/impl/IndexUpdateCallbackImpl.java

@@ -25,6 +25,7 @@ import jp.sf.fess.Constants;
 import jp.sf.fess.FessSystemException;
 import jp.sf.fess.FessSystemException;
 import jp.sf.fess.ds.IndexUpdateCallback;
 import jp.sf.fess.ds.IndexUpdateCallback;
 import jp.sf.fess.helper.CrawlingSessionHelper;
 import jp.sf.fess.helper.CrawlingSessionHelper;
+import jp.sf.fess.helper.IndexingHelper;
 import jp.sf.fess.helper.SearchLogHelper;
 import jp.sf.fess.helper.SearchLogHelper;
 import jp.sf.fess.helper.SystemHelper;
 import jp.sf.fess.helper.SystemHelper;
 import jp.sf.fess.util.ComponentUtil;
 import jp.sf.fess.util.ComponentUtil;
@@ -40,7 +41,7 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
 
 
     protected SolrGroup solrGroup;
     protected SolrGroup solrGroup;
 
 
-    public int maxDocumentCacheSize = 10;
+    public int maxDocumentCacheSize = 5;
 
 
     public boolean clickCountEnabled = true;
     public boolean clickCountEnabled = true;
 
 
@@ -71,6 +72,7 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
             throw new FessSystemException("url is null. dataMap=" + dataMap);
             throw new FessSystemException("url is null. dataMap=" + dataMap);
         }
         }
 
 
+        final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper();
         final CrawlingSessionHelper crawlingSessionHelper = ComponentUtil
         final CrawlingSessionHelper crawlingSessionHelper = ComponentUtil
                 .getCrawlingSessionHelper();
                 .getCrawlingSessionHelper();
         dataMap.put("id", crawlingSessionHelper.generateId(dataMap));
         dataMap.put("id", crawlingSessionHelper.generateId(dataMap));
@@ -85,13 +87,13 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
         }
         }
 
 
         if (docList.size() >= maxDocumentCacheSize) {
         if (docList.size() >= maxDocumentCacheSize) {
-            sendDocuments();
+            indexingHelper.sendDocuments(solrGroup, docList);
         }
         }
         documentSize.getAndIncrement();
         documentSize.getAndIncrement();
         // commit
         // commit
         if (commitPerCount > 0 && documentSize.get() % commitPerCount == 0) {
         if (commitPerCount > 0 && documentSize.get() % commitPerCount == 0) {
             if (!docList.isEmpty()) {
             if (!docList.isEmpty()) {
-                sendDocuments();
+                indexingHelper.sendDocuments(solrGroup, docList);
             }
             }
             commitDocuments();
             commitDocuments();
         }
         }
@@ -142,7 +144,9 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
     @Override
     @Override
     public void commit() {
     public void commit() {
         if (!docList.isEmpty()) {
         if (!docList.isEmpty()) {
-            sendDocuments();
+            final IndexingHelper indexingHelper = ComponentUtil
+                    .getIndexingHelper();
+            indexingHelper.sendDocuments(solrGroup, docList);
         }
         }
         commitDocuments();
         commitDocuments();
     }
     }
@@ -161,22 +165,6 @@ public class IndexUpdateCallbackImpl implements IndexUpdateCallback {
         }
         }
     }
     }
 
 
-    protected void sendDocuments() {
-        final long execTime = System.currentTimeMillis();
-        if (logger.isInfoEnabled()) {
-            logger.info("Sending " + docList.size() + " document to a server.");
-        }
-        synchronized (solrGroup) {
-            solrGroup.add(docList);
-        }
-        if (logger.isInfoEnabled()) {
-            logger.info("Sent " + docList.size()
-                    + " documents. The execution time is "
-                    + (System.currentTimeMillis() - execTime) + "ms.");
-        }
-        docList.clear();
-    }
-
     protected void addClickCountField(final SolrInputDocument doc,
     protected void addClickCountField(final SolrInputDocument doc,
             final String url) {
             final String url) {
         final SearchLogHelper searchLogHelper = ComponentUtil
         final SearchLogHelper searchLogHelper = ComponentUtil

+ 253 - 0
src/main/java/jp/sf/fess/helper/IndexingHelper.java

@@ -0,0 +1,253 @@
+/*
+ * Copyright 2009-2014 the CodeLibs Project and the Others.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+ * either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+
+package jp.sf.fess.helper;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import jp.sf.fess.util.ComponentUtil;
+
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.client.solrj.response.UpdateResponse;
+import org.apache.solr.client.solrj.util.ClientUtils;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.common.SolrInputDocument;
+import org.codelibs.solr.lib.SolrGroup;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class IndexingHelper {
+    private static final Logger logger = LoggerFactory
+            .getLogger(IndexingHelper.class);
+
+    public int maxRetryCount = 5;
+
+    public int defaultRowSize = 100;
+
+    public long requestInterval = 500;
+
+    public void sendDocuments(final SolrGroup solrGroup,
+            final List<SolrInputDocument> docList) {
+        final long execTime = System.currentTimeMillis();
+        if (logger.isDebugEnabled()) {
+            logger.debug("Sending " + docList.size()
+                    + " documents to a server.");
+        }
+        synchronized (solrGroup) {
+            deleteOldDocuments(solrGroup, docList);
+            solrGroup.add(docList);
+        }
+        if (logger.isInfoEnabled()) {
+            logger.info("Sent " + docList.size() + " docs (Solr: "
+                    + (System.currentTimeMillis() - execTime) + "ms)");
+        }
+        docList.clear();
+    }
+
+    private void deleteOldDocuments(final SolrGroup solrGroup,
+            final List<SolrInputDocument> docList) {
+        final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
+
+        final List<String> ids = new ArrayList<String>();
+        final StringBuilder q = new StringBuilder(1000);
+        final StringBuilder fq = new StringBuilder(100);
+        for (final SolrInputDocument inputDoc : docList) {
+            final Object idValue = inputDoc.getFieldValue(systemHelper.idField);
+            if (idValue == null) {
+                continue;
+            }
+
+            final Object configIdValue = inputDoc
+                    .getFieldValue(systemHelper.configIdField);
+            if (configIdValue == null) {
+                continue;
+            }
+
+            q.setLength(0);
+            q.append(systemHelper.urlField).append(":\"");
+            q.append(ClientUtils.escapeQueryChars((String) inputDoc
+                    .getFieldValue(systemHelper.urlField)));
+            q.append('"');
+
+            fq.setLength(0);
+            fq.append(systemHelper.configIdField).append(':');
+            fq.append(configIdValue.toString());
+
+            final SolrDocumentList docs = getSolrDocumentList(solrGroup,
+                    fq.toString(), q.toString(),
+                    new String[] { systemHelper.idField });
+            for (final SolrDocument doc : docs) {
+                final Object oldIdValue = doc
+                        .getFieldValue(systemHelper.idField);
+                if (!idValue.equals(oldIdValue) && oldIdValue != null) {
+                    ids.add(oldIdValue.toString());
+                }
+            }
+            if (logger.isDebugEnabled()) {
+                logger.debug(q + " in " + fq + " => " + docs);
+            }
+        }
+        if (!ids.isEmpty()) {
+            for (final String id : ids) {
+                deleteDocument(solrGroup, id);
+            }
+        }
+    }
+
+    public SolrDocumentList getSolrDocumentList(final SolrGroup solrGroup,
+            final String fq, final String q, final String[] fields) {
+        return getSolrDocumentList(solrGroup, fq, q, fields, defaultRowSize);
+    }
+
+    protected SolrDocumentList getSolrDocumentList(final SolrGroup solrGroup,
+            final String fq, final String q, final String[] fields,
+            final int row) {
+        final SolrQuery sq = new SolrQuery();
+        if (fq != null) {
+            sq.setFilterQueries(fq);
+        }
+        sq.setQuery(q);
+        if (fields != null) {
+            sq.setFields(fields);
+        }
+        sq.setRows(row);
+        final SolrDocumentList docList = solrGroup.query(sq).getResults();
+        if (docList.getNumFound() < row) {
+            return docList;
+        }
+        return getSolrDocumentList(solrGroup, fq, q, fields,
+                (int) docList.getNumFound());
+    }
+
+    public void deleteDocument(final SolrGroup solrGroup, final String id) {
+        final String query = "{!raw f=id}" + id;
+        for (int i = 0; i < maxRetryCount; i++) {
+            boolean done = true;
+            try {
+                for (final UpdateResponse response : solrGroup
+                        .deleteByQuery(query)) {
+                    if (response.getStatus() != 200) {
+                        if (logger.isDebugEnabled()) {
+                            logger.debug("Failed to delete: " + response);
+                        }
+                        done = false;
+                    }
+                }
+            } catch (final Exception e) {
+                logger.warn("Could not delete a document from Solr."
+                        + " It might be busy. " + "Retrying.. id:" + id
+                        + ", cause: " + e.getMessage());
+                done = false;
+            }
+            if (done) {
+                logger.info("Deleted from Solr: " + id);
+                break;
+            }
+            try {
+                Thread.sleep(requestInterval);
+            } catch (final InterruptedException e) {
+            }
+        }
+    }
+
+    public SolrDocument getSolrDocument(final SolrGroup solrGroup,
+            final String id, final String[] fields) {
+        final SolrQuery solrQuery = new SolrQuery();
+        final StringBuilder queryBuf = new StringBuilder(200);
+        queryBuf.append("{!raw f=id}");
+        queryBuf.append(id);
+        solrQuery.setQuery(queryBuf.toString());
+        if (fields != null) {
+            solrQuery.setFields(fields);
+        }
+        final QueryResponse response = solrGroup.query(solrQuery);
+        final SolrDocumentList docList = response.getResults();
+        if (docList.isEmpty()) {
+            return null;
+        }
+        if (docList.size() > 1) {
+            logger.error("Invalid multiple docs for " + id);
+            final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
+            for (final SolrDocument doc : docList) {
+                final Object idValue = doc.getFieldValue(systemHelper.idField);
+                if (idValue != null) {
+                    deleteDocument(solrGroup, idValue.toString());
+                }
+            }
+            return null;
+        }
+        return docList.get(0);
+    }
+
+    public SolrDocumentList getSolrDocumentListByPrefixId(
+            final SolrGroup solrGroup, final String id, final String[] fields) {
+        final SolrQuery solrQuery = new SolrQuery();
+        final StringBuilder queryBuf = new StringBuilder(200);
+        queryBuf.append("{!prefix f=id}");
+        queryBuf.append(id);
+        solrQuery.setQuery(queryBuf.toString());
+        if (fields != null) {
+            solrQuery.setFields(fields);
+        }
+        final QueryResponse response = solrGroup.query(solrQuery);
+        final SolrDocumentList docList = response.getResults();
+        if (docList.isEmpty()) {
+            return null;
+        }
+        if (logger.isDebugEnabled()) {
+            logger.debug("Found solr documents: " + docList);
+        }
+        return docList;
+    }
+
+    public void deleteChildSolrDocument(final SolrGroup solrGroup,
+            final String id) {
+        final String query = "{!raw f=parentId v=\"" + id + "\"}";
+        for (final UpdateResponse response : solrGroup.deleteByQuery(query)) {
+            if (response.getStatus() != 200) {
+                if (logger.isDebugEnabled()) {
+                    logger.debug("Failed to delete: " + response);
+                }
+            }
+        }
+    }
+
+    public SolrDocumentList getChildSolrDocumentList(final SolrGroup solrGroup,
+            final String id, final String[] fields) {
+        return getChildSolrDocumentList(solrGroup, id, fields, defaultRowSize);
+    }
+
+    protected SolrDocumentList getChildSolrDocumentList(
+            final SolrGroup solrGroup, final String id, final String[] fields,
+            final int row) {
+        final SolrQuery solrQuery = new SolrQuery();
+        solrQuery.setQuery("{!raw f=parentId v=\"" + id + "\"}");
+        if (fields != null) {
+            solrQuery.setFields(fields);
+        }
+        solrQuery.setRows(row);
+        final SolrDocumentList docList = solrGroup.query(solrQuery)
+                .getResults();
+        if (docList.getNumFound() < row) {
+            return docList;
+        }
+        return getChildSolrDocumentList(solrGroup, id, fields,
+                (int) docList.getNumFound());
+    }
+}

+ 2 - 2
src/main/java/jp/sf/fess/helper/QueryHelper.java

@@ -107,13 +107,13 @@ public class QueryHelper implements Serializable {
     protected String[] responseFields = new String[] { "id", "docId", "score",
     protected String[] responseFields = new String[] { "id", "docId", "score",
             "boost", "contentLength", "host", "site", "lastModified",
             "boost", "contentLength", "host", "site", "lastModified",
             "mimetype", "filetype_s", "created", TITLE_FIELD, "digest", "url",
             "mimetype", "filetype_s", "created", TITLE_FIELD, "digest", "url",
-            "clickCount_l_x_dv", "favoriteCount_l_x_dv", "cid_s_s", "lang_s",
+            "clickCount_l_x_dv", "favoriteCount_l_x_dv", "cid_s", "lang_s",
             "hasCache_s_s" };
             "hasCache_s_s" };
 
 
     protected String[] cacheResponseFields = new String[] { "id", "docId",
     protected String[] cacheResponseFields = new String[] { "id", "docId",
             "score", "boost", "contentLength", "host", "site", "lastModified",
             "score", "boost", "contentLength", "host", "site", "lastModified",
             "mimetype", "filetype_s", "created", TITLE_FIELD, "digest", "url",
             "mimetype", "filetype_s", "created", TITLE_FIELD, "digest", "url",
-            "clickCount_l_x_dv", "favoriteCount_l_x_dv", "cid_s_s", "lang_s",
+            "clickCount_l_x_dv", "favoriteCount_l_x_dv", "cid_s", "lang_s",
             "cache" };
             "cache" };
 
 
     protected String[] responseDocValuesFields = new String[] {
     protected String[] responseDocValuesFields = new String[] {

+ 9 - 1
src/main/java/jp/sf/fess/helper/SystemHelper.java

@@ -122,7 +122,7 @@ public class SystemHelper implements Serializable {
 
 
     public String clickCountField = "clickCount_l_x_dv";
     public String clickCountField = "clickCount_l_x_dv";
 
 
-    public String configIdField = "cid_s_s";
+    public String configIdField = "cid_s";
 
 
     public String expiresField = "expires_dt";
     public String expiresField = "expires_dt";
 
 
@@ -136,6 +136,14 @@ public class SystemHelper implements Serializable {
 
 
     public String hasCacheField = "hasCache_s_s";
     public String hasCacheField = "hasCache_s_s";
 
 
+    public String lastModifiedField = "lastModified";
+
+    public String anchorField = "anchor";
+
+    public String segmentField = "segment";
+
+    public String roleField = "role";
+
     protected String[] supportedLanguages = new String[] { "ar", "bg", "ca",
     protected String[] supportedLanguages = new String[] { "ar", "bg", "ca",
             "da", "de", "el", "en", "es", "eu", "fa", "fi", "fr", "ga", "gl",
             "da", "de", "el", "en", "es", "eu", "fa", "fi", "fr", "ga", "gl",
             "hi", "hu", "hy", "id", "it", "ja", "lv", "ko", "nl", "no", "pt",
             "hi", "hu", "hy", "id", "it", "ja", "lv", "ko", "nl", "no", "pt",

+ 68 - 172
src/main/java/jp/sf/fess/robot/FessS2RobotThread.java

@@ -31,15 +31,13 @@ import jp.sf.fess.Constants;
 import jp.sf.fess.db.exentity.CrawlingConfig;
 import jp.sf.fess.db.exentity.CrawlingConfig;
 import jp.sf.fess.helper.CrawlingConfigHelper;
 import jp.sf.fess.helper.CrawlingConfigHelper;
 import jp.sf.fess.helper.CrawlingSessionHelper;
 import jp.sf.fess.helper.CrawlingSessionHelper;
+import jp.sf.fess.helper.IndexingHelper;
 import jp.sf.fess.helper.SambaHelper;
 import jp.sf.fess.helper.SambaHelper;
 import jp.sf.fess.helper.SearchLogHelper;
 import jp.sf.fess.helper.SearchLogHelper;
 import jp.sf.fess.helper.SystemHelper;
 import jp.sf.fess.helper.SystemHelper;
 import jp.sf.fess.util.ComponentUtil;
 import jp.sf.fess.util.ComponentUtil;
 
 
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.io.IOUtils;
-import org.apache.solr.client.solrj.SolrQuery;
-import org.apache.solr.client.solrj.response.QueryResponse;
-import org.apache.solr.client.solrj.response.UpdateResponse;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 import org.apache.solr.common.SolrDocumentList;
 import org.codelibs.core.util.DynamicProperties;
 import org.codelibs.core.util.DynamicProperties;
@@ -61,10 +59,6 @@ public class FessS2RobotThread extends S2RobotThread {
     private static final Logger logger = LoggerFactory
     private static final Logger logger = LoggerFactory
             .getLogger(FessS2RobotThread.class);
             .getLogger(FessS2RobotThread.class);
 
 
-    public int maxSolrQueryRetryCount = 5;
-
-    public int childUrlSize = 10000;
-
     @Override
     @Override
     protected boolean isContentUpdated(final S2RobotClient client,
     protected boolean isContentUpdated(final S2RobotClient client,
             final UrlQueue urlQueue) {
             final UrlQueue urlQueue) {
@@ -82,34 +76,35 @@ public class FessS2RobotThread extends S2RobotThread {
                     .getCrawlingSessionHelper();
                     .getCrawlingSessionHelper();
             final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
             final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
             final SambaHelper sambaHelper = ComponentUtil.getSambaHelper();
             final SambaHelper sambaHelper = ComponentUtil.getSambaHelper();
+            final IndexingHelper indexingHelper = ComponentUtil
+                    .getIndexingHelper();
+            final SolrGroupManager solrGroupManager = ComponentUtil
+                    .getSolrGroupManager();
             final boolean useAclAsRole = crawlerProperties.getProperty(
             final boolean useAclAsRole = crawlerProperties.getProperty(
                     Constants.USE_ACL_AS_ROLE, Constants.FALSE).equals(
                     Constants.USE_ACL_AS_ROLE, Constants.FALSE).equals(
                     Constants.TRUE);
                     Constants.TRUE);
-            final String expiresField = systemHelper.expiresField;
 
 
+            final SolrGroup solrGroup = solrGroupManager
+                    .getSolrGroup(QueryType.ADD);
+
+            final String url = urlQueue.getUrl();
             ResponseData responseData = null;
             ResponseData responseData = null;
             try {
             try {
-                //  head method
-                responseData = client
-                        .execute(RequestDataBuilder.newRequestData().head()
-                                .url(urlQueue.getUrl()).build());
-                if (responseData == null) {
-                    return true;
-                }
-
-                SolrDocumentList oldDocWithRoleList = null;
                 final CrawlingConfig crawlingConfig = crawlingConfigHelper
                 final CrawlingConfig crawlingConfig = crawlingConfigHelper
                         .get(robotContext.getSessionId());
                         .get(robotContext.getSessionId());
                 final Map<String, Object> dataMap = new HashMap<String, Object>();
                 final Map<String, Object> dataMap = new HashMap<String, Object>();
-                dataMap.put("url", urlQueue.getUrl());
+                dataMap.put(systemHelper.urlField, url);
                 final List<String> roleTypeList = new ArrayList<String>();
                 final List<String> roleTypeList = new ArrayList<String>();
                 for (final String roleType : crawlingConfig.getRoleTypeValues()) {
                 for (final String roleType : crawlingConfig.getRoleTypeValues()) {
                     roleTypeList.add(roleType);
                     roleTypeList.add(roleType);
                 }
                 }
-                if (useAclAsRole && responseData.getUrl().startsWith("smb://")) {
-                    final String id = crawlingSessionHelper.generateId(dataMap);
-                    oldDocWithRoleList = getSolrDocumentList(id, true,
-                            expiresField);
+                if (useAclAsRole && url.startsWith("smb://")) {
+                    //  head method
+                    responseData = client.execute(RequestDataBuilder
+                            .newRequestData().head().url(url).build());
+                    if (responseData == null) {
+                        return true;
+                    }
 
 
                     final ACE[] aces = (ACE[]) responseData.getMetaDataMap()
                     final ACE[] aces = (ACE[]) responseData.getMetaDataMap()
                             .get(SmbClient.SMB_ACCESS_CONTROL_ENTRIES);
                             .get(SmbClient.SMB_ACCESS_CONTROL_ENTRIES);
@@ -124,36 +119,40 @@ public class FessS2RobotThread extends S2RobotThread {
                         }
                         }
                     }
                     }
                 }
                 }
-                dataMap.put("role", roleTypeList);
+                dataMap.put(systemHelper.roleField, roleTypeList);
                 final String id = crawlingSessionHelper.generateId(dataMap);
                 final String id = crawlingSessionHelper.generateId(dataMap);
 
 
-                final SolrDocumentList solrDocumentList = getSolrDocumentList(
-                        id, false, expiresField);
-                if (solrDocumentList == null) {
-                    deleteSolrDocumentList(oldDocWithRoleList);
-                    storeChildUrlsToQueue(urlQueue, getChildUrlSet(id));
-                    return true;
-                }
-
-                if (solrDocumentList.size() > 1) {
-                    // invalid state
-                    deleteSolrDocumentList(oldDocWithRoleList);
-                    deleteSolrDocumentList(solrDocumentList);
+                final SolrDocument solrDocument = indexingHelper
+                        .getSolrDocument(solrGroup, id, new String[] {
+                                systemHelper.idField,
+                                systemHelper.lastModifiedField,
+                                systemHelper.anchorField,
+                                systemHelper.segmentField,
+                                systemHelper.expiresField,
+                                systemHelper.clickCountField,
+                                systemHelper.favoriteCountField });
+                if (solrDocument == null) {
+                    storeChildUrlsToQueue(urlQueue,
+                            getChildUrlSet(solrGroup, id)); // TODO
                     return true;
                     return true;
                 }
                 }
 
 
-                final SolrDocument solrDocument = solrDocumentList.get(0);
-                final Date expires = (Date) solrDocument.get(expiresField);
+                final Date expires = (Date) solrDocument
+                        .get(systemHelper.expiresField);
                 if (expires != null
                 if (expires != null
                         && expires.getTime() < System.currentTimeMillis()) {
                         && expires.getTime() < System.currentTimeMillis()) {
-                    deleteSolrDocumentList(oldDocWithRoleList);
+                    final Object idValue = solrDocument
+                            .getFieldValue(systemHelper.idField);
+                    if (idValue != null) {
+                        indexingHelper.deleteDocument(solrGroup,
+                                idValue.toString());
+                    }
                     return true;
                     return true;
                 }
                 }
 
 
                 final Date lastModified = (Date) solrDocument
                 final Date lastModified = (Date) solrDocument
-                        .get("lastModified");
+                        .get(systemHelper.lastModifiedField);
                 if (lastModified == null) {
                 if (lastModified == null) {
-                    deleteSolrDocumentList(oldDocWithRoleList);
                     return true;
                     return true;
                 }
                 }
 
 
@@ -162,10 +161,8 @@ public class FessS2RobotThread extends S2RobotThread {
                 if (clickCount != null) {
                 if (clickCount != null) {
                     final SearchLogHelper searchLogHelper = ComponentUtil
                     final SearchLogHelper searchLogHelper = ComponentUtil
                             .getSearchLogHelper();
                             .getSearchLogHelper();
-                    final int count = searchLogHelper.getClickCount(urlQueue
-                            .getUrl());
+                    final int count = searchLogHelper.getClickCount(url);
                     if (count != clickCount.intValue()) {
                     if (count != clickCount.intValue()) {
-                        deleteSolrDocumentList(oldDocWithRoleList);
                         return true;
                         return true;
                     }
                     }
                 }
                 }
@@ -175,23 +172,28 @@ public class FessS2RobotThread extends S2RobotThread {
                 if (favoriteCount != null) {
                 if (favoriteCount != null) {
                     final SearchLogHelper searchLogHelper = ComponentUtil
                     final SearchLogHelper searchLogHelper = ComponentUtil
                             .getSearchLogHelper();
                             .getSearchLogHelper();
-                    final long count = searchLogHelper
-                            .getFavoriteCount(urlQueue.getUrl());
+                    final long count = searchLogHelper.getFavoriteCount(url);
                     if (count != favoriteCount.longValue()) {
                     if (count != favoriteCount.longValue()) {
-                        deleteSolrDocumentList(oldDocWithRoleList);
+                        return true;
+                    }
+                }
+
+                if (responseData == null) {
+                    //  head method
+                    responseData = client.execute(RequestDataBuilder
+                            .newRequestData().head().url(url).build());
+                    if (responseData == null) {
                         return true;
                         return true;
                     }
                     }
                 }
                 }
 
 
                 final int httpStatusCode = responseData.getHttpStatusCode();
                 final int httpStatusCode = responseData.getHttpStatusCode();
                 if (httpStatusCode == 404) {
                 if (httpStatusCode == 404) {
-                    deleteSolrDocument(id);
-                    deleteSolrDocumentList(oldDocWithRoleList);
                     storeChildUrlsToQueue(urlQueue,
                     storeChildUrlsToQueue(urlQueue,
                             getAnchorSet(solrDocument.get("anchor")));
                             getAnchorSet(solrDocument.get("anchor")));
+                    indexingHelper.deleteDocument(solrGroup, id);
                     return false;
                     return false;
                 } else if (responseData.getLastModified() == null) {
                 } else if (responseData.getLastModified() == null) {
-                    deleteSolrDocumentList(oldDocWithRoleList);
                     return true;
                     return true;
                 } else if (responseData.getLastModified().getTime() <= lastModified
                 } else if (responseData.getLastModified().getTime() <= lastModified
                         .getTime() && httpStatusCode == 200) {
                         .getTime() && httpStatusCode == 200) {
@@ -256,134 +258,28 @@ public class FessS2RobotThread extends S2RobotThread {
         return childUrlSet;
         return childUrlSet;
     }
     }
 
 
-    protected SolrDocumentList getSolrDocumentList(final String id,
-            final boolean wildcard, final String expiresField) {
-        final SolrGroupManager solrGroupManager = ComponentUtil
-                .getSolrGroupManager();
+    protected Set<RequestData> getChildUrlSet(final SolrGroup solrGroup,
+            final String id) {
         final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
         final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
-        final SolrGroup solrGroup = solrGroupManager
-                .getSolrGroup(QueryType.ADD);
-        final SolrQuery solrQuery = new SolrQuery();
-        final StringBuilder queryBuf = new StringBuilder(200);
-        if (wildcard) {
-            queryBuf.append("{!prefix f=id}");
-        } else {
-            queryBuf.append("{!raw f=id}");
-        }
-        queryBuf.append(id);
-        solrQuery.setQuery(queryBuf.toString());
-        solrQuery.setFields("id", "lastModified", "anchor", "segment", "role",
-                expiresField, systemHelper.clickCountField,
-                systemHelper.favoriteCountField);
-        for (int i = 0; i < maxSolrQueryRetryCount; i++) {
-            try {
-                final QueryResponse response = solrGroup.query(solrQuery);
-                final SolrDocumentList docList = response.getResults();
-                if (docList.isEmpty()) {
-                    return null;
-                }
-                if (logger.isDebugEnabled()) {
-                    logger.debug("Found solr documents: " + docList);
-                }
-                return docList;
-            } catch (final Exception e) {
-                logger.info("Could not get a response from Solr."
-                        + " It might be busy. " + "Retrying.. id:" + id
-                        + ", cause: " + e.getMessage());
-            }
-            try {
-                Thread.sleep(500);
-            } catch (final InterruptedException e) {
-            }
+        final IndexingHelper indexingHelper = ComponentUtil.getIndexingHelper();
+        final SolrDocumentList docList = indexingHelper
+                .getChildSolrDocumentList(solrGroup, id,
+                        new String[] { systemHelper.urlField });
+        if (docList.isEmpty()) {
+            return null;
         }
         }
-        return null;
-    }
-
-    protected Set<RequestData> getChildUrlSet(final String id) {
-        final SolrGroupManager solrGroupManager = ComponentUtil
-                .getSolrGroupManager();
-        final SolrGroup solrGroup = solrGroupManager
-                .getSolrGroup(QueryType.ADD);
-        final SolrQuery solrQuery = new SolrQuery();
-        solrQuery.setQuery("{!raw f=parentId v=\"" + id + "\"}");
-        solrQuery.setFields("url");
-        solrQuery.setRows(childUrlSize);
-        for (int i = 0; i < maxSolrQueryRetryCount; i++) {
-            try {
-                final QueryResponse response = solrGroup.query(solrQuery);
-                final SolrDocumentList docList = response.getResults();
-                if (docList.isEmpty()) {
-                    return null;
-                }
-                if (logger.isDebugEnabled()) {
-                    logger.debug("Found solr documents: " + docList);
-                }
-                final Set<RequestData> urlSet = new HashSet<>(docList.size());
-                for (final SolrDocument doc : docList) {
-                    final Object obj = doc.get("url");
-                    if (obj != null) {
-                        urlSet.add(RequestDataBuilder.newRequestData().get()
-                                .url(obj.toString()).build());
-                    }
-                }
-                return urlSet;
-            } catch (final Exception e) {
-                logger.info("Could not get a response from Solr."
-                        + " It might be busy. " + "Retrying.. id:" + id
-                        + ", cause: " + e.getMessage());
-            }
-            try {
-                Thread.sleep(500);
-            } catch (final InterruptedException e) {
-            }
+        if (logger.isDebugEnabled()) {
+            logger.debug("Found solr documents: " + docList);
         }
         }
-        return null;
-    }
-
-    protected void deleteSolrDocument(final String id) {
-        final SolrGroupManager solrGroupManager = ComponentUtil
-                .getSolrGroupManager();
-        final SolrGroup solrGroup = solrGroupManager
-                .getSolrGroup(QueryType.DELETE);
-        final String query = "{!raw f=parentId v=\"" + id + "\"}";
-        for (int i = 0; i < maxSolrQueryRetryCount; i++) {
-            boolean done = true;
-            try {
-                for (final UpdateResponse response : solrGroup
-                        .deleteByQuery(query)) {
-                    if (response.getStatus() != 200) {
-                        if (logger.isDebugEnabled()) {
-                            logger.debug("Failed to delete: " + response);
-                        }
-                        done = false;
-                    }
-                }
-            } catch (final Exception e) {
-                logger.info("Could not delete a document from Solr."
-                        + " It might be busy. " + "Retrying.. id:" + id
-                        + ", cause: " + e.getMessage());
-                done = false;
-            }
-            if (done) {
-                logger.info("Deleted from Solr: " + id);
-                break;
-            }
-            try {
-                Thread.sleep(500);
-            } catch (final InterruptedException e) {
+        final Set<RequestData> urlSet = new HashSet<>(docList.size());
+        for (final SolrDocument doc : docList) {
+            final Object obj = doc.get(systemHelper.urlField);
+            if (obj != null) {
+                urlSet.add(RequestDataBuilder.newRequestData().get()
+                        .url(obj.toString()).build());
             }
             }
         }
         }
+        return urlSet;
     }
     }
 
 
-    protected void deleteSolrDocumentList(
-            final SolrDocumentList solrDocumentList) {
-        if (solrDocumentList != null) {
-            for (final SolrDocument solrDocument : solrDocumentList) {
-                final Object idObj = solrDocument.get("id");
-                if (idObj != null) {
-                    deleteSolrDocument(idObj.toString());
-                }
-            }
-        }
-    }
 }
 }

+ 13 - 80
src/main/java/jp/sf/fess/solr/IndexUpdater.java

@@ -17,7 +17,6 @@
 package jp.sf.fess.solr;
 package jp.sf.fess.solr;
 
 
 import java.util.ArrayList;
 import java.util.ArrayList;
-import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashMap;
 import java.util.List;
 import java.util.List;
 import java.util.Map;
 import java.util.Map;
@@ -32,15 +31,11 @@ import jp.sf.fess.db.exbhv.ClickLogBhv;
 import jp.sf.fess.db.exbhv.FavoriteLogBhv;
 import jp.sf.fess.db.exbhv.FavoriteLogBhv;
 import jp.sf.fess.db.exbhv.pmbean.FavoriteUrlCountPmb;
 import jp.sf.fess.db.exbhv.pmbean.FavoriteUrlCountPmb;
 import jp.sf.fess.db.exentity.customize.FavoriteUrlCount;
 import jp.sf.fess.db.exentity.customize.FavoriteUrlCount;
+import jp.sf.fess.helper.IndexingHelper;
 import jp.sf.fess.helper.IntervalControlHelper;
 import jp.sf.fess.helper.IntervalControlHelper;
 import jp.sf.fess.helper.SystemHelper;
 import jp.sf.fess.helper.SystemHelper;
 import jp.sf.fess.util.ComponentUtil;
 import jp.sf.fess.util.ComponentUtil;
 
 
-import org.apache.commons.collections.CollectionUtils;
-import org.apache.solr.client.solrj.SolrQuery;
-import org.apache.solr.client.solrj.util.ClientUtils;
-import org.apache.solr.common.SolrDocument;
-import org.apache.solr.common.SolrDocumentList;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.SolrInputDocument;
 import org.codelibs.core.util.StringUtil;
 import org.codelibs.core.util.StringUtil;
 import org.codelibs.robot.S2Robot;
 import org.codelibs.robot.S2Robot;
@@ -95,8 +90,13 @@ public class IndexUpdater extends Thread {
     @Resource
     @Resource
     protected SystemHelper systemHelper;
     protected SystemHelper systemHelper;
 
 
+    @Resource
+    protected IndexingHelper indexingHelper;
+
     public int maxDocumentCacheSize = 5;
     public int maxDocumentCacheSize = 5;
 
 
+    public int maxInvalidDocumentSize = 100;
+
     protected boolean finishCrawling = false;
     protected boolean finishCrawling = false;
 
 
     public long updateInterval = 60000; // 1 min
     public long updateInterval = 60000; // 1 min
@@ -243,7 +243,7 @@ public class IndexUpdater extends Thread {
                     }
                     }
 
 
                     if (!docList.isEmpty()) {
                     if (!docList.isEmpty()) {
-                        sendDocuments(docList);
+                        indexingHelper.sendDocuments(solrGroup, docList);
                     }
                     }
 
 
                     synchronized (finishedSessionIdList) {
                     synchronized (finishedSessionIdList) {
@@ -395,14 +395,14 @@ public class IndexUpdater extends Thread {
                     }
                     }
 
 
                     if (docList.size() >= maxDocumentCacheSize) {
                     if (docList.size() >= maxDocumentCacheSize) {
-                        sendDocuments(docList);
+                        indexingHelper.sendDocuments(solrGroup, docList);
                     }
                     }
                     documentSize++;
                     documentSize++;
                     // commit
                     // commit
                     if (commitPerCount > 0
                     if (commitPerCount > 0
                             && documentSize % commitPerCount == 0) {
                             && documentSize % commitPerCount == 0) {
                         if (!docList.isEmpty()) {
                         if (!docList.isEmpty()) {
-                            sendDocuments(docList);
+                            indexingHelper.sendDocuments(solrGroup, docList);
                         }
                         }
                         commitDocuments();
                         commitDocuments();
                     }
                     }
@@ -563,17 +563,15 @@ public class IndexUpdater extends Thread {
             }
             }
         }
         }
         if (logger.isInfoEnabled()) {
         if (logger.isInfoEnabled()) {
-            logger.info("The number of a crawled document is "
-                    + arList.getAllRecordCount() + ". The processing size is "
-                    + arList.size() + ". The execution time is "
-                    + (System.currentTimeMillis() - execTime) + "ms.");
+            logger.info("Processing " + arList.size() + "/"
+                    + arList.getAllRecordCount() + " docs (DB: "
+                    + (System.currentTimeMillis() - execTime) + "ms)");
         }
         }
         if (arList.getAllRecordCount() > unprocessedDocumentSize) {
         if (arList.getAllRecordCount() > unprocessedDocumentSize) {
             if (logger.isInfoEnabled()) {
             if (logger.isInfoEnabled()) {
                 logger.info("Stopped all crawler threads. " + " You have "
                 logger.info("Stopped all crawler threads. " + " You have "
                         + arList.getAllRecordCount() + " (>"
                         + arList.getAllRecordCount() + " (>"
-                        + unprocessedDocumentSize + ") "
-                        + " unprocessed documents.");
+                        + unprocessedDocumentSize + ") " + " unprocessed docs.");
             }
             }
             final IntervalControlHelper intervalControlHelper = ComponentUtil
             final IntervalControlHelper intervalControlHelper = ComponentUtil
                     .getIntervalControlHelper();
                     .getIntervalControlHelper();
@@ -620,71 +618,6 @@ public class IndexUpdater extends Thread {
         }
         }
     }
     }
 
 
-    @SuppressWarnings("unchecked")
-    private void deleteDocuments(final List<SolrInputDocument> docList) {
-        final List<String> ids = new ArrayList<String>();
-        for (final SolrInputDocument inputDoc : docList) {
-            final Collection<Object> roleList = inputDoc.getFieldValues("role");
-            final StringBuilder query = new StringBuilder();
-            query.append("url:\"");
-            query.append(ClientUtils.escapeQueryChars((String) inputDoc
-                    .getFieldValue("url")));
-            query.append("\"");
-
-            final SolrQuery sq = new SolrQuery();
-            sq.setRows(1);
-            sq.setFields(new String[] { "id", "role" });
-            sq.setQuery(query.toString());
-            final SolrDocumentList docs = solrGroup.query(sq).getResults();
-            if (docs.size() > 0) {
-                for (final SolrDocument doc : docs) {
-                    // checking changed roles
-                    final Collection<Object> docRoleList = doc
-                            .getFieldValues("role");
-
-                    if (CollectionUtils.isEmpty(roleList)
-                            && CollectionUtils.isEmpty(docRoleList)) {
-                        // neither have role
-                        continue;
-                    }
-                    if (CollectionUtils.isNotEmpty(roleList)
-                            && CollectionUtils.isNotEmpty(docRoleList)) {
-                        final List<String> diff = (List<String>) CollectionUtils
-                                .disjunction(roleList, docRoleList);
-                        if (diff.size() == 0) {
-                            // has same role(s)
-                            continue;
-                        }
-                    }
-                    // has different role(s)
-                    ids.add((String) doc.getFieldValue("id"));
-                }
-            }
-        }
-        if (ids.size() > 0) {
-            synchronized (solrGroup) {
-                solrGroup.deleteById(ids);
-            }
-        }
-    }
-
-    private void sendDocuments(final List<SolrInputDocument> docList) {
-        final long execTime = System.currentTimeMillis();
-        if (logger.isInfoEnabled()) {
-            logger.info("Sending " + docList.size() + " document to a server.");
-        }
-        synchronized (solrGroup) {
-            deleteDocuments(docList);
-            solrGroup.add(docList);
-        }
-        if (logger.isInfoEnabled()) {
-            logger.info("Sent " + docList.size()
-                    + " documents. The execution time is "
-                    + (System.currentTimeMillis() - execTime) + "ms.");
-        }
-        docList.clear();
-    }
-
     private void forceStop() {
     private void forceStop() {
         systemHelper.setForceStop(true);
         systemHelper.setForceStop(true);
         for (final S2Robot s2Robot : s2RobotList) {
         for (final S2Robot s2Robot : s2RobotList) {

+ 7 - 0
src/main/java/jp/sf/fess/util/ComponentUtil.java

@@ -25,6 +25,7 @@ import jp.sf.fess.helper.DatabaseHelper;
 import jp.sf.fess.helper.DocumentHelper;
 import jp.sf.fess.helper.DocumentHelper;
 import jp.sf.fess.helper.FileTypeHelper;
 import jp.sf.fess.helper.FileTypeHelper;
 import jp.sf.fess.helper.HotSearchWordHelper;
 import jp.sf.fess.helper.HotSearchWordHelper;
+import jp.sf.fess.helper.IndexingHelper;
 import jp.sf.fess.helper.IntervalControlHelper;
 import jp.sf.fess.helper.IntervalControlHelper;
 import jp.sf.fess.helper.JobHelper;
 import jp.sf.fess.helper.JobHelper;
 import jp.sf.fess.helper.KeyMatchHelper;
 import jp.sf.fess.helper.KeyMatchHelper;
@@ -111,6 +112,8 @@ public final class ComponentUtil {
 
 
     private static final String KEY_MATCH_HELPER = "keyMatchHelper";
     private static final String KEY_MATCH_HELPER = "keyMatchHelper";
 
 
+    private static final String INDEXING_HELPER = "indexingHelper";
+
     private ComponentUtil() {
     private ComponentUtil() {
     }
     }
 
 
@@ -242,4 +245,8 @@ public final class ComponentUtil {
     public static KeyMatchHelper getKeyMatchHelper() {
     public static KeyMatchHelper getKeyMatchHelper() {
         return SingletonS2Container.getComponent(KEY_MATCH_HELPER);
         return SingletonS2Container.getComponent(KEY_MATCH_HELPER);
     }
     }
+
+    public static IndexingHelper getIndexingHelper() {
+        return SingletonS2Container.getComponent(INDEXING_HELPER);
+    }
 }
 }

+ 1 - 1
src/main/resources/app.dicon

@@ -81,7 +81,7 @@
             "boost", "contentLength", "host", "site", "lastModified",
             "boost", "contentLength", "host", "site", "lastModified",
             "mimetype", "filetype_s", "created", "title", "digest", "url",
             "mimetype", "filetype_s", "created", "title", "digest", "url",
             "clickCount_l_x_dv", "favoriteCount_l_x_dv",
             "clickCount_l_x_dv", "favoriteCount_l_x_dv",
-            "cid_s_s", "lang_s", "hasCache_s_s" }</property>
+            "cid_s", "lang_s", "hasCache_s_s" }</property>
 		<property name="responseDocValuesFields">new String[]{
 		<property name="responseDocValuesFields">new String[]{
             "clickCount_l_x_dv", "favoriteCount_l_x_dv"}</property>
             "clickCount_l_x_dv", "favoriteCount_l_x_dv"}</property>
 		<property name="highlightingFields">new String[]{"digest", "cache" }</property>
 		<property name="highlightingFields">new String[]{"digest", "cache" }</property>

+ 2 - 0
src/main/webapp/WEB-INF/cmd/resources/app.dicon

@@ -10,6 +10,8 @@
 
 
 	<include path="s2robot_db.dicon" />
 	<include path="s2robot_db.dicon" />
 
 
+	<component name="indexingHelper" class="jp.sf.fess.helper.IndexingHelper">
+	</component>
 	<component name="labelTypeHelper" class="jp.sf.fess.helper.LabelTypeHelper">
 	<component name="labelTypeHelper" class="jp.sf.fess.helper.LabelTypeHelper">
 	</component>
 	</component>
 	<component name="webFsIndexHelper" class="jp.sf.fess.helper.WebFsIndexHelper">
 	<component name="webFsIndexHelper" class="jp.sf.fess.helper.WebFsIndexHelper">