This commit is contained in:
Shinsuke Sugaya 2015-06-20 19:34:49 +09:00
parent 237180db4b
commit 15bb3ca2e8
5 changed files with 132 additions and 54 deletions

View file

@ -270,7 +270,7 @@ public class DataAction implements Serializable {
final File oFile = tempFile;
try {
final String head = new String(b, Constants.UTF_8);
final String head = new String(b, Constants.UTF_8).replace("\"", "");
if (!head.startsWith("SessionId,")
&& !head.startsWith("SearchWord,")
&& !head.startsWith("SearchId,")) {

View file

@ -33,6 +33,7 @@ import jp.sf.fess.db.exbhv.ClickLogBhv;
import jp.sf.fess.db.exbhv.SearchLogBhv;
import jp.sf.fess.db.exentity.ClickLog;
import jp.sf.fess.db.exentity.SearchLog;
import jp.sf.fess.util.CsvUtil;
import jp.sf.orangesignal.csv.CsvConfig;
import jp.sf.orangesignal.csv.CsvReader;
import jp.sf.orangesignal.csv.CsvWriter;
@ -58,26 +59,32 @@ public class ClickLogService implements Serializable {
protected ClickLogBhv clickLogBhv;
public void importCsv(final Reader reader) {
final CsvReader csvReader = new CsvReader(reader, new CsvConfig());
final SimpleDateFormat sdf = new SimpleDateFormat(
CoreLibConstants.DATE_FORMAT_ISO_8601_EXTEND);
final CsvConfig cfg = new CsvConfig(',', '"', '"');
cfg.setEscapeDisabled(false);
cfg.setQuoteDisabled(false);
@SuppressWarnings("resource")
final CsvReader csvReader = new CsvReader(reader, cfg);
try {
List<String> list;
csvReader.readValues(); // ignore header
while ((list = csvReader.readValues()) != null) {
try {
final SearchLogCB cb = new SearchLogCB();
cb.query().setRequestedTime_Equal(
new Timestamp(sdf.parse(list.get(3)).getTime()));
cb.query().setUserSessionId_Equal(list.get(4));
cb.query().setRequestedTime_Equal(CsvUtil.getAsTimestamp(list, 3, new Timestamp(System.currentTimeMillis())));
cb.query().setUserSessionId_Equal(CsvUtil.get(list, 4));
final SearchLog searchLog = searchLogBhv.selectEntity(cb);
if (searchLog != null) {
final ClickLog entity = new ClickLog();
entity.setId(Long.parseLong(list.get(0)));
final long id = CsvUtil.getAsLong(list, 0, -1);
final Timestamp requestedTime = CsvUtil.getAsTimestamp(list, 2, new Timestamp(0));
if (id == -1 || requestedTime.getTime() == 0) {
log.warn("Invalid id or timestamp for search log: " + list);
continue;
}
entity.setId(id);
entity.setSearchId(searchLog.getId());
entity.setUrl(list.get(1));
entity.setRequestedTime(new Timestamp(sdf.parse(
list.get(2)).getTime()));
entity.setUrl(CsvUtil.get(list, 1, StringUtil.EMPTY));
entity.setRequestedTime(requestedTime);
clickLogBhv.insert(entity);
} else {
log.warn("The search log is not found: " + list);
@ -95,6 +102,7 @@ public class ClickLogService implements Serializable {
final CsvConfig cfg = new CsvConfig(',', '"', '"');
cfg.setEscapeDisabled(false);
cfg.setQuoteDisabled(false);
@SuppressWarnings("resource")
final CsvWriter csvWriter = new CsvWriter(writer, cfg);
final ClickLogCB cb = new ClickLogCB();
cb.setupSelect_SearchLog();

View file

@ -29,18 +29,6 @@ import java.util.Set;
import javax.annotation.Resource;
import jp.sf.fess.FessSystemException;
import jp.sf.fess.crud.service.BsCrawlingSessionService;
import jp.sf.fess.db.cbean.CrawlingSessionCB;
import jp.sf.fess.db.cbean.CrawlingSessionInfoCB;
import jp.sf.fess.db.exbhv.CrawlingSessionInfoBhv;
import jp.sf.fess.db.exentity.CrawlingSession;
import jp.sf.fess.db.exentity.CrawlingSessionInfo;
import jp.sf.fess.pager.CrawlingSessionPager;
import jp.sf.orangesignal.csv.CsvConfig;
import jp.sf.orangesignal.csv.CsvReader;
import jp.sf.orangesignal.csv.CsvWriter;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.codelibs.core.CoreLibConstants;
@ -52,6 +40,19 @@ import org.seasar.dbflute.cbean.coption.LikeSearchOption;
import com.ibm.icu.text.SimpleDateFormat;
import jp.sf.fess.FessSystemException;
import jp.sf.fess.crud.service.BsCrawlingSessionService;
import jp.sf.fess.db.cbean.CrawlingSessionCB;
import jp.sf.fess.db.cbean.CrawlingSessionInfoCB;
import jp.sf.fess.db.exbhv.CrawlingSessionInfoBhv;
import jp.sf.fess.db.exentity.CrawlingSession;
import jp.sf.fess.db.exentity.CrawlingSessionInfo;
import jp.sf.fess.pager.CrawlingSessionPager;
import jp.sf.fess.util.CsvUtil;
import jp.sf.orangesignal.csv.CsvConfig;
import jp.sf.orangesignal.csv.CsvReader;
import jp.sf.orangesignal.csv.CsvWriter;
public class CrawlingSessionService extends BsCrawlingSessionService implements
Serializable {
@ -183,40 +184,45 @@ public class CrawlingSessionService extends BsCrawlingSessionService implements
}
public void importCsv(final Reader reader) {
final CsvReader csvReader = new CsvReader(reader, new CsvConfig());
final SimpleDateFormat sdf = new SimpleDateFormat(
CoreLibConstants.DATE_FORMAT_ISO_8601_EXTEND);
final CsvConfig cfg = new CsvConfig(',', '"', '"');
cfg.setEscapeDisabled(false);
cfg.setQuoteDisabled(false);
@SuppressWarnings("resource")
final CsvReader csvReader = new CsvReader(reader, cfg);
try {
List<String> list;
csvReader.readValues(); // ignore header
while ((list = csvReader.readValues()) != null) {
final String sessionId = CsvUtil.get(list, 0);
if (sessionId == null) {
log.warn("No crawling session log: Line " + csvReader.getLineNumber());
continue;
}
try {
final CrawlingSessionCB cb = new CrawlingSessionCB();
cb.query().setSessionId_Equal(list.get(0));
cb.query().setSessionId_Equal(sessionId);
cb.specify().columnSessionId();
CrawlingSession crawlingSession = crawlingSessionBhv
.selectEntity(cb);
if (crawlingSession == null) {
crawlingSession = new CrawlingSession();
crawlingSession.setSessionId(list.get(0));
crawlingSession.setCreatedTime(new Timestamp(sdf.parse(
list.get(1)).getTime()));
crawlingSession.setSessionId(sessionId);
crawlingSession.setCreatedTime(CsvUtil.getAsTimestamp(list, 1, new Timestamp(System.currentTimeMillis())));
crawlingSessionBhv.insert(crawlingSession);
}
final CrawlingSessionInfo entity = new CrawlingSessionInfo();
entity.setCrawlingSessionId(crawlingSession.getId());
entity.setKey(list.get(2));
entity.setValue(list.get(3));
entity.setCreatedTime(new Timestamp(sdf.parse(list.get(4))
.getTime()));
entity.setKey(CsvUtil.get(list, 2, "Unknown"));
entity.setValue(CsvUtil.get(list, 3,StringUtil.EMPTY));
entity.setCreatedTime(CsvUtil.getAsTimestamp(list, 4, new Timestamp(System.currentTimeMillis())));
crawlingSessionInfoBhv.insert(entity);
} catch (final Exception e) {
log.warn("Failed to read a click log: " + list, e);
log.warn("Failed to read a crawling session info: " + list, e);
}
}
} catch (final IOException e) {
log.warn("Failed to read a click log.", e);
log.warn("Failed to read a crawling session info.", e);
}
}

View file

@ -40,6 +40,7 @@ import jp.sf.fess.db.exbhv.SearchFieldLogBhv;
import jp.sf.fess.db.exentity.SearchFieldLog;
import jp.sf.fess.db.exentity.SearchLog;
import jp.sf.fess.pager.SearchLogPager;
import jp.sf.fess.util.CsvUtil;
import jp.sf.orangesignal.csv.CsvConfig;
import jp.sf.orangesignal.csv.CsvReader;
import jp.sf.orangesignal.csv.CsvWriter;
@ -179,31 +180,39 @@ public class SearchLogService extends BsSearchLogService implements
}
public void importCsv(final Reader reader) {
final CsvReader csvReader = new CsvReader(reader, new CsvConfig());
final SimpleDateFormat sdf = new SimpleDateFormat(
CoreLibConstants.DATE_FORMAT_ISO_8601_EXTEND);
final CsvConfig cfg = new CsvConfig(',', '"', '"');
cfg.setEscapeDisabled(false);
cfg.setQuoteDisabled(false);
@SuppressWarnings("resource")
final CsvReader csvReader = new CsvReader(reader, cfg);
try {
List<String> list;
csvReader.readValues(); // ignore header
while ((list = csvReader.readValues()) != null) {
final String searchWord = CsvUtil.get(list, 0);
if(StringUtil.isBlank(searchWord)){
if(log.isDebugEnabled()){
log.debug("Search Word is empty: "+list);
}
continue;
}
try {
final SearchLog entity = new SearchLog();
entity.setSearchWord(list.get(0));
entity.setSearchQuery(list.get(1));
entity.setSolrQuery(list.get(2));
entity.setRequestedTime(new Timestamp(sdf
.parse(list.get(3)).getTime()));
entity.setResponseTime(Integer.parseInt(list.get(4)));
entity.setHitCount(Long.parseLong(list.get(5)));
entity.setQueryOffset(Integer.parseInt(list.get(6)));
entity.setQueryPageSize(Integer.parseInt(list.get(7)));
entity.setUserAgent(list.get(8));
entity.setReferer(list.get(9));
entity.setClientIp(list.get(10));
entity.setUserSessionId(list.get(11));
entity.setAccessType(list.get(12));
entity.setSearchWord(searchWord);
entity.setSearchQuery(CsvUtil.get(list, 1, "Unknown"));
entity.setSolrQuery(CsvUtil.get(list, 2, "Unknown"));
entity.setRequestedTime(CsvUtil.getAsTimestamp(list, 3, new Timestamp(System.currentTimeMillis())));
entity.setResponseTime(CsvUtil.getAsInt(list, 4, 0));
entity.setHitCount(CsvUtil.getAsLong(list, 5, 0L));
entity.setQueryOffset(CsvUtil.getAsInt(list, 6, 0));
entity.setQueryPageSize(CsvUtil.getAsInt(list, 7, 20));
entity.setUserAgent(CsvUtil.get(list, 8, StringUtil.EMPTY));
entity.setReferer(CsvUtil.get(list, 9, StringUtil.EMPTY));
entity.setClientIp(CsvUtil.get(list, 10, StringUtil.EMPTY));
entity.setUserSessionId(CsvUtil.get(list, 11, StringUtil.EMPTY));
entity.setAccessType(CsvUtil.get(list, 12, StringUtil.EMPTY));
if (list.size() >= 14) {
final String jsonStr = list.get(13);
final String jsonStr = CsvUtil.get(list, 13,StringUtil.EMPTY);
@SuppressWarnings("rawtypes")
final List objList = JSON.decode(jsonStr);
for (final Object obj : objList) {
@ -228,6 +237,7 @@ public class SearchLogService extends BsSearchLogService implements
final CsvConfig cfg = new CsvConfig(',', '"', '"');
cfg.setEscapeDisabled(false);
cfg.setQuoteDisabled(false);
@SuppressWarnings("resource")
final CsvWriter csvWriter = new CsvWriter(writer, cfg);
final SearchLogCB cb = new SearchLogCB();
if (searchLogPager != null) {

View file

@ -0,0 +1,54 @@
package jp.sf.fess.util;
import java.sql.Timestamp;
import java.text.ParseException;
import java.util.List;
import org.codelibs.core.CoreLibConstants;
import com.ibm.icu.text.SimpleDateFormat;
public class CsvUtil {
private CsvUtil() {
}
public static String get(final List<String> list, final int i) {
return get(list, i, null);
}
public static String get(final List<String> list, final int i, final String defaultValue) {
if (list == null || list.isEmpty() || i >= list.size()) {
return defaultValue;
}
final String value = list.get(i);
if (value == null) {
return defaultValue;
}
return value.replaceFirst("^\"", "").replaceFirst("\"$", "");
}
public static Timestamp getAsTimestamp(final List<String> list, final int i, final Timestamp defaultValue) throws ParseException {
final String value = get(list, i);
if (value == null) {
return defaultValue;
}
final SimpleDateFormat sdf = new SimpleDateFormat(CoreLibConstants.DATE_FORMAT_ISO_8601_EXTEND);
return new Timestamp(sdf.parse(value).getTime());
}
public static int getAsInt(final List<String> list, final int i, final int defaultValue) {
final String value = get(list, i);
if (value == null) {
return defaultValue;
}
return Integer.parseInt(value);
}
public static long getAsLong(final List<String> list, final int i, final long defaultValue) {
final String value = get(list, i);
if (value == null) {
return defaultValue;
}
return Long.parseLong(value);
}
}