Skip to content

Commit

Permalink
Merge pull request #333 from min2ha/feature-search-coll
Browse files Browse the repository at this point in the history
1.3.7 - Bugfix release - Search Within Collections
  • Loading branch information
GilHoggarth authored Oct 13, 2021
2 parents d3bb427 + 249cb0e commit 39d1f33
Show file tree
Hide file tree
Showing 10 changed files with 171 additions and 66 deletions.
1 change: 0 additions & 1 deletion .github/workflows/push-to-docker-hub.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ on:
branches:
- master
- dev

jobs:
push_to_docker_hub:
name: Push Docker image to Docker Hub
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package com.marsspiders.ukwa.controllers;

import static com.marsspiders.ukwa.solr.AccessToEnum.VIEWABLE_ONLY_ON_LIBRARY;
import static com.marsspiders.ukwa.solr.CollectionDocumentType.TYPE_COLLECTION;
import static com.marsspiders.ukwa.solr.CollectionDocumentType.TYPE_TARGET;
import static com.marsspiders.ukwa.solr.SearchByEnum.FULL_TEXT;
import static com.marsspiders.ukwa.util.UrlUtil.getLocale;
import static com.marsspiders.ukwa.util.UrlUtil.getRootPathWithLang;
import static java.util.Collections.singletonList;
Expand All @@ -22,6 +24,8 @@

import javax.servlet.http.HttpServletRequest;

import com.marsspiders.ukwa.solr.SortByEnum;
import com.marsspiders.ukwa.solr.data.ContentInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
Expand Down Expand Up @@ -109,9 +113,13 @@ public ModelAndView collectionOverviewPage(@PathVariable("collectionId") String
List<TargetWebsiteDTO> targetWebsites = generateTargetWebsitesDTOs(targetWebsitesDocuments, rootPathWithLang, userIpFromBl);
List<CollectionDTO> subCollections = generateSubCollectionDTOs(collectionId, locale);
CollectionDTO currentCollection = generatePlainCollectionDTO(collectionId, locale, targetWebsitesSearchResult);
Map<String, String> breadcrumbPath = buildCollectionBreadcrumbPath(currentCollection);

if (currentCollection == null)
return new ModelAndView("errorCollNotFound");

Map<String, String> breadcrumbPath = buildCollectionBreadcrumbPath(currentCollection);
ModelAndView mav = new ModelAndView("coll");
mav.addObject("checkCount", searchAnyFullTextIndex(currentCollection.getName()));
mav.addObject("userIpFromBl", userIpFromBl);
mav.addObject("breadcrumbPath", breadcrumbPath);
mav.addObject("targetWebsites", targetWebsites);
Expand All @@ -125,7 +133,6 @@ public ModelAndView collectionOverviewPage(@PathVariable("collectionId") String
if (targetPageNumber > totalPages)
targetPageNumber = totalPages;
mav.addObject("targetPageNumber", targetPageNumber);

return mav;
}

Expand All @@ -139,16 +146,13 @@ private Map<String, String> buildCollectionBreadcrumbPath(CollectionDTO currentC
.fetchCollectionById(parentCollectionId)
.getResponseBody().getDocuments()
.get(0);

//Create a new map to get reversed map in result
Map<String, String> oldPath = new LinkedHashMap<>(path);
path.clear();
path.put(parentCollection.getId(), parentCollection.getName());
path.putAll(oldPath);

parentCollectionId = parentCollection.getParentId();
}

return path;
}

Expand Down Expand Up @@ -178,17 +182,13 @@ private List<CollectionDTO> generateRootCollectionDTOs(Locale locale) {
.stream()
.forEach(subCollection -> {
String parentId = subCollection.getParentId();

CollectionDTO parentCollectionDto = rootCollections.get(parentId);
if (parentCollectionDto.getSubCollections() == null) {
parentCollectionDto.setSubCollections(new ArrayList<>());
}

CollectionDTO subCollectionDTO = toCollectionDTO(subCollection, true, locale);
parentCollectionDto.getSubCollections().add(subCollectionDTO);

});

ArrayList<CollectionDTO> sortedCollectionDTOs = new ArrayList<>(rootCollections.values());
Collections.sort(sortedCollectionDTOs, (c1, c2) -> c1.getName().compareTo(c2.getName()));

Expand All @@ -198,15 +198,17 @@ private List<CollectionDTO> generateRootCollectionDTOs(Locale locale) {
private CollectionDTO generatePlainCollectionDTO(String collectionId,
Locale locale,
SolrSearchResult<CollectionInfo> targetWebsitesSearchResult) {
CollectionInfo currentCollectionInformation = searchService
.fetchCollectionById(collectionId)
.getResponseBody().getDocuments()
.get(0);

CollectionDTO collectionDTO = toCollectionDTO(currentCollectionInformation, false, locale);
collectionDTO.setWebsitesNum(targetWebsitesSearchResult.getResponseBody().getNumFound());

return collectionDTO;
SolrSearchResult <CollectionInfo> solrSearchResult = searchService
.fetchCollectionById(collectionId);
if (solrSearchResult.getResponseBody().getDocuments().size() > 0) {
CollectionDTO collectionDTO = toCollectionDTO(solrSearchResult
.getResponseBody().getDocuments()
.get(0), false, locale);
collectionDTO.setWebsitesNum(targetWebsitesSearchResult.getResponseBody().getNumFound());
return collectionDTO;
}
else
return null;
}

private List<TargetWebsiteDTO> generateTargetWebsitesDTOs(List<CollectionInfo> websites,
Expand Down Expand Up @@ -280,4 +282,32 @@ private static boolean readRoomOnlyAccess(CollectionInfo websiteInfo) {
return websiteInfo.getLicenses() == null || websiteInfo.getLicenses().size() == 0;
}


/**
* Check if specific Collection has Any-Full-Text-Index (possibly will be updated later)
* @param collectionName
* @return
*/
public Long searchAnyFullTextIndex(String collectionName) {
List<String> originalCollections = new ArrayList<>();
originalCollections.add(collectionName);
//originalCollections = collections != null ? asList(collections) : emptyList();
SolrSearchResult<ContentInfo> archivedSites = searchService.searchContent(
FULL_TEXT,
collectionName!=null?originalCollections.get(0):"",
0,
SortByEnum.NEWEST_TO_OLDEST,
VIEWABLE_ONLY_ON_LIBRARY,
0,
null,
null,
null,
null, null,
null,
originalCollections,
false);

return archivedSites.getResponseBody().getNumFound();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ public ModelAndView searchPage(@RequestParam(value = "search_location", required
int startRowToSend = startFromRow <= solrSearchResultsLimit ? startFromRow : 1;
SolrSearchResult<ContentInfo> archivedSites = searchService.searchContent(searchBy, text, rowsPerPage,
sortBy, accessTo, startRowToSend, originalContentTypes, originalPublicSuffixes, originalDomains,
fromDate, toDate, originalRangeDates, originalCollections);
fromDate, toDate, originalRangeDates, originalCollections, true);


searchResultDTOs.addAll(toSearchResults(archivedSites, request, searchBy, userIpFromBl));
Expand Down Expand Up @@ -339,4 +339,5 @@ private static String toOriginalDomain(String url) {
return domain;
}


}
75 changes: 48 additions & 27 deletions src/main/java/com/marsspiders/ukwa/solr/SolrSearchService.java
Original file line number Diff line number Diff line change
Expand Up @@ -141,33 +141,55 @@ public SolrSearchResult<ContentInfo> searchContent(SearchByEnum searchLocation,
Date fromDatePicked,
Date toDatePicked,
List<String> rangeDates,
List<String> collections) {
List<String> collections,
boolean preProcessQueryString) {

log.debug("Searching content for '" + queryString + "' by " + searchLocation);
SortClause sort = sortBy == null ? null : (sortBy.getWebRequestOrderValue()=="relevant" ? new SortClause("score", sortBy.getSolrOrderValue()) : new SortClause(FIELD_CRAWL_DATE, sortBy.getSolrOrderValue()));
String dateQuery = generateDateQuery(fromDatePicked, toDatePicked, rangeDates);
String accessToQuery = generateAccessToQuery(accessTo);
String contentTypeQuery = generateMultipleConditionsQuery(contentTypes, FIELD_TYPE);
String collectionsQuery = generateMultipleConditionsQuery(collections, FIELD_COLLECTION);
String publicSuffixesQuery = generateMultipleConditionsQuery(publicSuffixes, FIELD_PUBLIC_SUFFIX);
String domainsQuery = generateMultipleConditionsQuery(originalDomains, FIELD_DOMAIN);

List<String> filters = new ArrayList<>();
filters.add(dateQuery);
filters.add(accessToQuery);
filters.add(contentTypeQuery);
filters.add(publicSuffixesQuery);
filters.add(domainsQuery);
filters.add(collectionsQuery);

String[] facets = { FIELD_PUBLIC_SUFFIX, FIELD_TYPE, FIELD_DOMAIN,
FIELD_COLLECTION, FIELD_ACCESS_TERMS };

//Remove:
// - URL prefixes
// - symbols: + - & | ! ( ) { } [ ] ^ " ~ * ? : \ /
Pattern p = Pattern.compile("(http[s]?://www\\.|http[s]?://|www\\.|[&|*()?:!,~{}^/]+)");

return sendRequest(p.matcher(queryString).replaceAll(""), sort, filters, FIELD_CONTENT, ContentInfo.class, start, rows, facets);
if (preProcessQueryString){
SortClause sort = sortBy == null ? null : (sortBy.getWebRequestOrderValue()=="relevant" ? new SortClause("score", sortBy.getSolrOrderValue()) : new SortClause(FIELD_CRAWL_DATE, sortBy.getSolrOrderValue()));
String dateQuery = generateDateQuery(fromDatePicked, toDatePicked, rangeDates);
String accessToQuery = generateAccessToQuery(accessTo);
String contentTypeQuery = generateMultipleConditionsQuery(contentTypes, FIELD_TYPE);
String collectionsQuery = generateMultipleConditionsQuery(collections, FIELD_COLLECTION);
String publicSuffixesQuery = generateMultipleConditionsQuery(publicSuffixes, FIELD_PUBLIC_SUFFIX);
String domainsQuery = generateMultipleConditionsQuery(originalDomains, FIELD_DOMAIN);

List<String> filters = new ArrayList<>();
filters.add(dateQuery);
filters.add(accessToQuery);
filters.add(contentTypeQuery);
filters.add(publicSuffixesQuery);
filters.add(domainsQuery);
filters.add(collectionsQuery);

String[] facets = { FIELD_PUBLIC_SUFFIX, FIELD_TYPE, FIELD_DOMAIN,
FIELD_COLLECTION, FIELD_ACCESS_TERMS };


//Remove:
// - URL prefixes
// - symbols: + - & | ! ( ) { } [ ] ^ " ~ * ? : \ /
Pattern p = Pattern.compile("(http[s]?://www\\.|http[s]?://|www\\.|[&|*()?:!,~{}^/]+)");
return sendRequest(p.matcher(queryString).replaceAll(""), sort, filters, FIELD_CONTENT, ContentInfo.class, start, rows, facets);
}
else
//return sendRequestEmpty(queryString, sort, filters, FIELD_CONTENT, ContentInfo.class, start, rows, facets);
return sendRequestCheckCollection(ContentInfo.class, queryString);
}

/**
* Check if there is anything in full-text-index for specific collection
* @param bodyDocsType
* @param collectionName
* @param <T>
* @return
*/
private <T extends BodyDocsType> SolrSearchResult<T> sendRequestCheckCollection(Class<T> bodyDocsType, String collectionName) {
SolrQuery query = new SolrQuery();
query.setQuery("*:*"); //main query
query.setParam("fq", "collection: " + "\"" + collectionName + "\""); //vs "collections" ?
//query.setParam("fl", "useDocValuesAsStored:true");
return communicator.sendRequest(bodyDocsType, query);
}

private <T extends BodyDocsType> SolrSearchResult<T> sendRequest(String queryString,
Expand Down Expand Up @@ -232,7 +254,6 @@ private <T extends BodyDocsType> SolrSearchResult<T> sendRequest(String queryStr
}
}
}

return communicator.sendRequest(bodyDocsType, query);
}
}
28 changes: 9 additions & 19 deletions src/main/java/com/marsspiders/ukwa/util/SolrSearchUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,17 @@ public static String generateDateQuery(Date fromDatePicked, Date toDatePicked, L
if(fromDatePicked != null || toDatePicked != null){
return SolrSearchService.FIELD_CRAWL_DATE + ":[" + fromDateText + " TO " + toDateText + "]";
}

String dateQuery = "";
for (String originalRangeDate : rangeDates) {
dateQuery += dateQuery.length() > 0 ? OR_JOINER : EXCLUDE_MARKER_SECOND_LAYER_TAG;
if (rangeDates != null)
for (String originalRangeDate : rangeDates) {
dateQuery += dateQuery.length() > 0 ? OR_JOINER : EXCLUDE_MARKER_SECOND_LAYER_TAG;

int yearWhenArchived = Integer.parseInt(originalRangeDate);
String fromDate = yearWhenArchived + DATE_PART_AFTER_YEAR;
String toDate = (yearWhenArchived + 1) + DATE_PART_AFTER_YEAR;
int yearWhenArchived = Integer.parseInt(originalRangeDate);
String fromDate = yearWhenArchived + DATE_PART_AFTER_YEAR;
String toDate = (yearWhenArchived + 1) + DATE_PART_AFTER_YEAR;

dateQuery += SolrSearchService.FIELD_CRAWL_DATE + ":[" + fromDate + " TO " + toDate + "]";
}
dateQuery += SolrSearchService.FIELD_CRAWL_DATE + ":[" + fromDate + " TO " + toDate + "]";
}

return dateQuery;
}
Expand All @@ -45,29 +45,23 @@ public static String generateAccessToQuery(AccessToEnum accessTo) {
if(accessTo == null){
accessTo = VIEWABLE_ANYWHERE;
}

String[] accessToFilters = accessTo.getSolrRequestAccessRestriction().split(",");

// If we're not filtering by one value, given there's only two possible
// values, do no filtering at all:
if (accessToFilters.length > 1) {
return "";
}

String multipleConditionQuery = toMultipleConditionsQuery(
Arrays.asList(accessToFilters), FIELD_ACCESS_TERMS);

return EXCLUDE_MARKER_FIRST_LAYER_TAG + multipleConditionQuery;
}

public static String generateMultipleConditionsQuery(List<String> conditions, String fieldName) {
String multipleConditionQueryWithExclude = "";

if(conditions.size() > 0){
if(conditions != null && conditions.size() > 0){
String multipleConditionsQuery = toMultipleConditionsQuery(conditions, fieldName);
multipleConditionQueryWithExclude = multipleConditionsQuery;
}

return multipleConditionQueryWithExclude;
}

Expand All @@ -89,11 +83,9 @@ private static String toMultipleConditionsQuery(List<String> values, String fiel
}
sb.append(fieldName).append(":").append("\"").append(valueToInclude).append("\"");
}

if(sb.length() != 0){
sb.append(")");
}

return sb.toString();
}

Expand All @@ -108,12 +100,10 @@ private static String toMultipleConditionsQueryWithPreCondition(List<String> val
}
sb.append(fieldName).append(":").append(valueToInclude);
}

if(sb.length() != 0){
sb.append(")");
sb.insert(0, AND_JOINER);
}

return sb.toString();
}
}
2 changes: 2 additions & 0 deletions src/main/resources/i18n/messages.properties
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,8 @@ cookies.main.heading = Cookie Policy
cookies.text = <h2>What are cookies?</h2><p>Cookies are small text files that websites save to your computer. They often include a randomly generated number which is stored on your device. Cookies are commonly used to improve browsing experience, measure website performance and support the delivery of services. Unless you have adjusted your browser settings to refuse cookies, our systems will issue cookies as soon you visit our website.</p><h2>How do we use cookies?</h2><p>Our website uses cookies to make the website easier for you to use and improve your overall experience when accessing the UK Web Archive.</p><p>The UK Web Archive currently creates 4 cookies:</p><p>JSESSIONID - this is created at the start of every session, when you open your browser and navigate to our website. Session cookies enables the website to keep track of your movement from page to page so you don't get asked for the same information you've already given to the site. These cookies allow you to be recognized within the website so any page changes or item or data selection you do is remembered from page to page. Your web browser will normally delete session cookies as it is closed down.</p><p>collections_display - this cookie allows our website to rememember how you want the Special collections to be displayed - as thumbnails or as a list. It is created the first time you navigate to Topics and Themes page.</p><p>cookies_accepted - this is used to identify if you viewed and accepted the cookie acknowledgement message. It is created once you confirmed you saw the message by clicking the "OK" button. The cookie acknowledgement message is displayed only once for each user.</p><p>survey_viewed - this indicates that you viewed a user survey that we have temporarily put onto the website. </p><p>Measuring website performance: our website must demonstrate value for money in the delivery of information. Monitoring the use of our website via cookies helps our team to work out which pages are most useful to our users, and to improve pages that they aren't finding useful. We also use cookies in order to monitor visitor numbers. No personal information other than your IP address is collected as part of this process.</p><p>To measure website performance we use Google Analytics. This service will create cookies the usage of which is explained here: <a href=https://developers.google.com/analytics/devguides/collection/analyticsjs/cookie-usage>https://developers.google.com/analytics/devguides/collection/analyticsjs/cookie-usage</a></p><h2>Where can I find more about cookies and how to turn-off or delete them?</h2><p>Find out more about cookies at www.aboutcookies.org including information on how to control and delete them in all major web browsers.</p><p>You can block cookies by activating the setting on your browser that allows you to refuse all or some cookies. However, if you use your browser settings to block all cookies (including essential cookies) you may not be able to access parts of our sites, or you may experience reduced functionality when accessing certain Services. Unless you have adjusted your browser setting so that it will refuse cookies, our system will issue cookies as soon you visit our website.</p><p>For information about deleting cookies from the browser on your mobile phone, refer to your handset manual.</p>
error.404.title = UKWA 404 error - Page not found
error.404.back.button = Go back
error.CollNotFound.heading = Error - Collection cannot be found
error.CollNotFound.text = Please check the Collection ID
error.404.heading = 404 error - Page cannot be found
error.404.text = Please check the URL
error.404.note = Please <a href=contact>contact</a> us with the details if you are unable to find what you are looking for.
Expand Down
Loading

0 comments on commit 39d1f33

Please sign in to comment.