-
Notifications
You must be signed in to change notification settings - Fork 896
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Web Discovery reporting & double fetching
- Loading branch information
Showing
17 changed files
with
2,083 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
/* Copyright (c) 2024 The Brave Authors. All rights reserved. | ||
* This Source Code Form is subject to the terms of the Mozilla Public | ||
* License, v. 2.0. If a copy of the MPL was not distributed with this file, | ||
* You can obtain one at https://mozilla.org/MPL/2.0/. */ | ||
|
||
#include "brave/components/web_discovery/browser/double_fetcher.h" | ||
|
||
#include <utility> | ||
|
||
#include "brave/components/web_discovery/browser/pref_names.h" | ||
#include "brave/components/web_discovery/browser/request_queue.h" | ||
#include "brave/components/web_discovery/browser/util.h" | ||
#include "components/prefs/pref_service.h" | ||
#include "services/network/public/cpp/shared_url_loader_factory.h" | ||
#include "services/network/public/cpp/simple_url_loader.h" | ||
#include "services/network/public/mojom/url_response_head.mojom.h" | ||
|
||
namespace web_discovery { | ||
|
||
namespace { | ||
constexpr char kUrlKey[] = "url"; | ||
constexpr char kAssociatedDataKey[] = "assoc_data"; | ||
|
||
constexpr base::TimeDelta kRequestMaxAge = base::Hours(1); | ||
constexpr base::TimeDelta kMinRequestInterval = | ||
base::Minutes(1) - base::Seconds(5); | ||
constexpr base::TimeDelta kMaxRequestInterval = | ||
base::Minutes(1) + base::Seconds(5); | ||
constexpr size_t kMaxRetries = 3; | ||
constexpr size_t kMaxDoubleFetchResponseSize = 2 * 1024 * 1024; | ||
|
||
constexpr net::NetworkTrafficAnnotationTag kFetchNetworkTrafficAnnotation = | ||
net::DefineNetworkTrafficAnnotation("wdp_doublefetch", R"( | ||
semantics { | ||
sender: "Brave Web Discovery Double Fetch" | ||
description: | ||
"Retrieves a page of interest without cookies for | ||
scraping and reporting via Web Discovery." | ||
trigger: | ||
"Requests are sent minutes after the original | ||
page request is made by the user." | ||
data: "Page data" | ||
destination: WEBSITE | ||
} | ||
policy { | ||
cookies_allowed: NO | ||
setting: | ||
"Users can opt-in or out via brave://settings/search" | ||
})"); | ||
|
||
} // namespace | ||
|
||
DoubleFetcher::DoubleFetcher( | ||
PrefService* profile_prefs, | ||
network::SharedURLLoaderFactory* shared_url_loader_factory, | ||
FetchedCallback callback) | ||
: profile_prefs_(profile_prefs), | ||
shared_url_loader_factory_(shared_url_loader_factory), | ||
request_queue_(profile_prefs, | ||
kScheduledDoubleFetches, | ||
kRequestMaxAge, | ||
kMinRequestInterval, | ||
kMaxRequestInterval, | ||
kMaxRetries, | ||
base::BindRepeating(&DoubleFetcher::OnFetchTimer, | ||
base::Unretained(this))), | ||
callback_(callback) {} | ||
|
||
DoubleFetcher::~DoubleFetcher() = default; | ||
|
||
void DoubleFetcher::ScheduleDoubleFetch(const GURL& url, | ||
base::Value associated_data) { | ||
base::Value::Dict fetch_dict; | ||
fetch_dict.Set(kUrlKey, url.spec()); | ||
fetch_dict.Set(kAssociatedDataKey, std::move(associated_data)); | ||
|
||
request_queue_.ScheduleRequest(base::Value(std::move(fetch_dict))); | ||
} | ||
|
||
void DoubleFetcher::OnFetchTimer(const base::Value& request_data) { | ||
const auto* fetch_dict = request_data.GetIfDict(); | ||
const auto* url_str = fetch_dict ? fetch_dict->FindString(kUrlKey) : nullptr; | ||
if (!url_str) { | ||
request_queue_.NotifyRequestComplete(true); | ||
return; | ||
} | ||
|
||
GURL url(*url_str); | ||
auto resource_request = CreateResourceRequest(url); | ||
url_loader_ = network::SimpleURLLoader::Create( | ||
std::move(resource_request), kFetchNetworkTrafficAnnotation); | ||
url_loader_->DownloadToString( | ||
shared_url_loader_factory_.get(), | ||
base::BindOnce(&DoubleFetcher::OnRequestComplete, base::Unretained(this), | ||
url), | ||
kMaxDoubleFetchResponseSize); | ||
} | ||
|
||
void DoubleFetcher::OnRequestComplete( | ||
GURL url, | ||
std::optional<std::string> response_body) { | ||
auto result = ProcessCompletedRequest(&response_body); | ||
|
||
auto request_data = request_queue_.NotifyRequestComplete(result); | ||
|
||
if (request_data) { | ||
const auto& request_dict = request_data->GetDict(); | ||
const auto* assoc_data = request_dict.Find(kAssociatedDataKey); | ||
if (assoc_data) { | ||
callback_.Run(url, *assoc_data, response_body); | ||
} | ||
} | ||
} | ||
|
||
bool DoubleFetcher::ProcessCompletedRequest( | ||
std::optional<std::string>* response_body) { | ||
auto* response_info = url_loader_->ResponseInfo(); | ||
if (!response_body || !response_info) { | ||
return false; | ||
} | ||
auto response_code = response_info->headers->response_code(); | ||
if (response_code < 200 || response_code >= 300) { | ||
if (response_code >= 500) { | ||
// Only retry failures due to server error | ||
return false; | ||
} | ||
*response_body = std::nullopt; | ||
} | ||
return true; | ||
} | ||
|
||
} // namespace web_discovery |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
/* Copyright (c) 2024 The Brave Authors. All rights reserved. | ||
* This Source Code Form is subject to the terms of the Mozilla Public | ||
* License, v. 2.0. If a copy of the MPL was not distributed with this file, | ||
* You can obtain one at https://mozilla.org/MPL/2.0/. */ | ||
|
||
#ifndef BRAVE_COMPONENTS_WEB_DISCOVERY_BROWSER_DOUBLE_FETCHER_H_ | ||
#define BRAVE_COMPONENTS_WEB_DISCOVERY_BROWSER_DOUBLE_FETCHER_H_ | ||
|
||
#include <memory> | ||
#include <optional> | ||
#include <string> | ||
|
||
#include "base/memory/raw_ptr.h" | ||
#include "base/values.h" | ||
#include "brave/components/web_discovery/browser/request_queue.h" | ||
#include "url/gurl.h" | ||
|
||
class PrefService; | ||
|
||
namespace network { | ||
class SharedURLLoaderFactory; | ||
class SimpleURLLoader; | ||
} // namespace network | ||
|
||
namespace web_discovery { | ||
|
||
// Makes anonymous requests to relevant page URLs, without involvement of the | ||
// user's session. In the case of search engine result pages, the result of the | ||
// double fetch will scraped for search engine results for a future submission. | ||
// Uses `RequestQueue` to persist and schedule double fetches. Requests | ||
// will be sent on somewhat random intervals averaging to a minute. | ||
class DoubleFetcher { | ||
public: | ||
using FetchedCallback = | ||
base::RepeatingCallback<void(const GURL& url, | ||
const base::Value& associated_data, | ||
std::optional<std::string> response_body)>; | ||
DoubleFetcher(PrefService* profile_prefs, | ||
network::SharedURLLoaderFactory* shared_url_loader_factory, | ||
FetchedCallback callback); | ||
~DoubleFetcher(); | ||
|
||
DoubleFetcher(const DoubleFetcher&) = delete; | ||
DoubleFetcher& operator=(const DoubleFetcher&) = delete; | ||
|
||
// Queues a double fetch for a given URL. The associated data will be stored | ||
// beside the queue request, and will be passed to the `FetchedCallback` | ||
// upon completion. | ||
void ScheduleDoubleFetch(const GURL& url, base::Value associated_data); | ||
|
||
private: | ||
void OnFetchTimer(const base::Value& request_data); | ||
void OnRequestComplete(GURL url, std::optional<std::string> response_body); | ||
bool ProcessCompletedRequest(std::optional<std::string>* response_body); | ||
|
||
raw_ptr<PrefService> profile_prefs_; | ||
raw_ptr<network::SharedURLLoaderFactory> shared_url_loader_factory_; | ||
std::unique_ptr<network::SimpleURLLoader> url_loader_; | ||
|
||
RequestQueue request_queue_; | ||
|
||
FetchedCallback callback_; | ||
}; | ||
|
||
} // namespace web_discovery | ||
|
||
#endif // BRAVE_COMPONENTS_WEB_DISCOVERY_BROWSER_DOUBLE_FETCHER_H_ |
Oops, something went wrong.