-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFlickrCaptor.py
72 lines (59 loc) · 2.16 KB
/
FlickrCaptor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from Captor import Captor
from UnavailabilityError import UnavailibilityError
import requests
import json
class FlickrCaptor(Captor):
url = "http://api.flickr.com/services/rest/"
api_key = "YOUR_API_KEY"
class FlickrApiError(Exception):
pass
def __init__(self, logger):
self.logger = logger
def __get_flickr_json(self, payload):
r = requests.get(self.url, params=payload, headers=self.headers)
r.raise_for_status()
loaded_json = json.loads(r.text.lstrip('jsonFlickrApi(').rstrip(')'))
if 'code' in loaded_json:
raise self.FlickrApiError('[code %s] %s'% (loaded_json['code'], loaded_json['message']))
else:
return loaded_json
def __get_cleaned_json(self, photos):
cleaned_loaded_json = []
for photo in photos:
cleaned_photo = {}
cleaned_photo['id'] = photo['id']
cleaned_photo['owner'] = photo['owner']
cleaned_photo['title'] = photo['title']
cleaned_photo['datetaken'] = photo['datetaken']
cleaned_photo['latitude'] = photo['latitude']
cleaned_photo['longitude'] = photo['longitude']
cleaned_photo['woeid'] = photo['woeid']
cleaned_photo['place_id'] = photo['place_id']
cleaned_photo['accuracy'] = photo['accuracy']
cleaned_photo['context'] = photo['context']
cleaned_photo['tags'] = photo['tags']
cleaned_loaded_json.append(cleaned_photo)
return cleaned_loaded_json
def get_data(self, min_date, max_date, woe_id):
payload = {
'api_key':self.api_key,
'method':'flickr.photos.search',
'format':'json',
'woe_id':woe_id,
'min_upload_date':min_date,
'max_upload_date':max_date,
'extras':'date_taken, geo, tags', 'per_page':'500'}
loaded_json = self.__get_flickr_json(payload)
total = loaded_json['photos']['total']
cleaned_loaded_json = self.__get_cleaned_json(loaded_json['photos']['photo'])
pages = loaded_json['photos']['pages']
if pages > 1:
self.logger.info("%d pages" % pages)
for page in range(2, pages+1):
page_payload = payload
page_payload['page'] = page
loaded_json = self.__get_flickr_json(page_payload)
cleaned_loaded_json += self.__get_cleaned_json(loaded_json['photos']['photo'])
else:
self.logger.info("1 page")
return cleaned_loaded_json