Skip to content

Commit

Permalink
switch to search engine
Browse files Browse the repository at this point in the history
  • Loading branch information
Administrator committed Jul 6, 2024
1 parent 0625f8f commit a78c7c2
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 37 deletions.
2 changes: 1 addition & 1 deletion lens-gpt-backend/lens_gpt_backend/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def process_async(file_hash: str, init: Callable[[Producer], None]) -> None:

def _processing_hierarchy(file_hash: str) -> Producer:

lens_links = LensLinksProducer(file_hash)
lens_links = LensLinksProducer(file_hash, add_queue=False)
model_producer = ModelProducerProducer(file_hash)
model_information_producer = ModelInformationProducer(file_hash)
price_producer = PriceProducer(file_hash)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
class ModelInformationProducer(Producer):

def _produce(self, input_value: Product) -> tuple[Product, bool]:
producer_url = input_value.get_str()
producer_url = input_value.get_dict_str_str()["link"]
scape_function = partial(_get_product_info, producer_url)
result = driver_pool.execute(scape_function)
return result, True
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"メルカリ\nPatagonia Men's P-6 Logo Long-Sleeve Responsibili-Tee | "
"Patagonia long sleeve, Tees, Patagonia")

EXAMPLE_ANSWERS = "producer: Patagonia\nmodel: Patagonia Men's Long-Sleeved P-6 Logo Responsibili-Tee"
EXAMPLE_ANSWERS = "producer: Patagonia\nmodel: Men's Long-Sleeved P-6 Logo Responsibili-Tee"


class ModelProducerProducer(Producer):
Expand Down
2 changes: 2 additions & 0 deletions lens-gpt-backend/lens_gpt_backend/producer/producer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ def __init__(self, file_hash: str, add_queue: bool = True) -> None:

if add_queue:
self._result_queue = ResultQueue.factory(file_hash)
else:
self._result_queue = None

print(f"Producer[{file_hash}]: created")

Expand Down
41 changes: 12 additions & 29 deletions lens-gpt-backend/lens_gpt_backend/producer/producer_website.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from lens_gpt_backend.producer.producer import Producer
from lens_gpt_backend.utils.chat_gpt import ask_chat_gpt
from lens_gpt_backend.utils.driver_pool import driver_pool
from lens_gpt_backend.utils.google_search import google_search
from lens_gpt_backend.utils.product import Product
from lens_gpt_backend.utils.utils import distinct

Expand All @@ -30,36 +31,18 @@
class ProducerWebsite(Producer):

def _produce(self, input_value: Product) -> tuple[Product, bool]:
base_url = "https://google.com/"
search_dict = input_value.get_dict_str_str()
search_term = f"{search_dict['producer']} {search_dict['model']}"
scape_function = partial(_get_urls_for_image, search_term)
result = driver_pool.execute(scape_function, base_url)
return result, True


def build_google_search_url(query: str) -> str:
base_url = "https://www.google.com/search?q="
encoded_query = quote_plus(query)
return base_url + encoded_query

search_dict = input_value.get_dict_str_str()
search = search_dict["producer"] + " " + search_dict["model"]
search_results = google_search(search)

def _get_urls_for_image(search: str, driver: WebDriver, wait: WebDriverWait[WebDriver]) -> Product:
search_format = build_google_search_url(search)
driver.get(search_format)

# Get all links from the search by getting all a tags from the center column with id center_col
links = driver.find_elements(By.CSS_SELECTOR, "#res a")
# Quick and dirty, filter all out which are not very wide, as they are probably ads
links = [link for link in links if link.size["width"] > 250]
urls = [link.get_attribute("href") for link in links]
non_google_urls = distinct([url for url in urls if url and "google.com" not in url])
enumerate_urls = [f"{i + 1}. {url}" for i, url in enumerate(non_google_urls)]
input_urls = "\n".join(enumerate_urls[:7])
prompt = f"Product: {search}\n{input_urls}"
response = ask_chat_gpt(ASSISTANT_INSTR, [EXAMPLE_TITLES, EXAMPLE_ANSWERS, prompt])
input_urls = [f"{i + 1}. {result['link']}" for i, result in enumerate(search_results)]
prompt = f"Product: {search}\n{input_urls}"

if response:
return Product(non_google_urls[int(response) - 1], data_description="url")
for i in range(3):
response = ask_chat_gpt(ASSISTANT_INSTR, [EXAMPLE_TITLES, EXAMPLE_ANSWERS, prompt])
if response.isnumeric() and 1 <= int(response) <= 7:
return Product(search_results[int(response) - 1], data_description="url"), True # type: ignore
print("Invalid response, please try again: " + response + "\n" + prompt)

raise ValueError("No response from AI model!")
raise ValueError("No response from AI model!")
15 changes: 15 additions & 0 deletions lens-gpt-backend/lens_gpt_backend/utils/google_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import requests


def google_search(query: str) -> list[dict[str, str | None]]:
url = "https://www.googleapis.com/customsearch/v1"
params = {
"key": "AIzaSyA3oA_T8h6RORQZe-3wmHGVUDIFXFm42fQ",
"cx": "23b9ec7abc764401d",
"q": query
}

response = requests.get(url, params=params)
results = response.json()

return [{"title": item["title"], "link": item["link"]} for item in results["items"]]
17 changes: 12 additions & 5 deletions lens-gpt-backend/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ <h2>Results:</h2>
return;
}



const file = fileInput.files[0];
const formData = new FormData();
formData.append('file', file);
Expand Down Expand Up @@ -57,9 +55,18 @@ <h2>Results:</h2>
timestamp.textContent = new Date().toISOString();
document.getElementById('results').appendChild(timestamp);

// Append data
const newElement = document.createElement('div');
newElement.textContent = part.replace(/^data: /, '');
// Format JSON data
let formattedData;
try {
const jsonData = JSON.parse(part.replace(/^data: /, ''));
formattedData = JSON.stringify(jsonData, null, 2);
} catch (e) {
formattedData = part.replace(/^data: /, '');
}

// Append formatted data
const newElement = document.createElement('pre');
newElement.textContent = formattedData;
document.getElementById('results').appendChild(newElement);

// Append divider
Expand Down

0 comments on commit a78c7c2

Please sign in to comment.