diff --git a/.gitignore b/.gitignore index e147773..c84bb3f 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,8 @@ nosetests.xml .mr.developer.cfg .project .pydevproject + +# drandreaskrueger: +env +twitterapi-oauth.txt +photos/ diff --git a/README.md b/README.md index a0acaf4..7f08c61 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,11 @@ For help: Authentication -------------- -See TwitterAPI documentation. +See TwitterAPI documentation: + +* ['create your app'](https://apps.twitter.com) +* [file format](http://pythonhosted.org/TwitterAPI/twitteroauth.html ) +* [explanations](https://developer.twitter.com/en/docs/basics/getting-started) Geocoder -------- @@ -44,3 +48,26 @@ Dependencies * TwitterAPI * pygeocoder * Fridge + +Install +------- + + python3 -m venv env/py3 + source env/py3/bin/activate + pip3 install --upgrade setuptools wheel + pip3 install fridge pygeocoder TwitterAPI + python3 setup.py build + python3 setup.py install + +or python2.7 + + deactivate + virtualenv env/py2 + python --version + source env/py2/bin/activate + pip install --upgrade setuptools wheel + pip install fridge pygeocoder TwitterAPI + python setup.py build + python setup.py install + + diff --git a/TwitterGeoPics/SearchOldTweets.py b/TwitterGeoPics/SearchOldTweets.py index 75217e6..5599fa5 100644 --- a/TwitterGeoPics/SearchOldTweets.py +++ b/TwitterGeoPics/SearchOldTweets.py @@ -9,20 +9,40 @@ import sys from TwitterAPI import TwitterAPI, TwitterOAuth, TwitterRestPager import urllib - +import datetime GEO = Geocoder() +def parse_date(status): + """ + expects date in this strange format: Sun Nov 05 17:14:42 +0000 2017 + FIXME: try with other twitter timezones please. Might need %z ? + TODO: Ending downloads as soon as cutoff datetime is reached? + """ + return datetime.datetime.strptime(status['created_at'], + '%a %b %d %H:%M:%S +0000 %Y') + +def unique_name(status): + """ + Unique filename for images, concatenating screen_name and timestamp + """ + screen_name = status['user']['screen_name'] + when = parse_date(status).strftime('%Y%m%d-%H%M%S') + # file_name = screen_name + "_" + when + # file_name = when + "_" + screen_name + file_name = when + "_" + screen_name + return file_name def download_photo(status, photo_dir): """Download photo(s) from embedded url(s).""" if 'media' in status['entities']: for media in status['entities'].get('media'): if media['type'] == 'photo': + file_name = unique_name(status) photo_url = media['media_url_https'] - screen_name = status['user']['screen_name'] - file_name = os.path.join(photo_dir, screen_name) + '.' + photo_url.split('.')[-1] - urllib.urlretrieve(photo_url, file_name) + file_name += '.' + photo_url.split('.')[-1] + urllib.urlretrieve(photo_url, os.path.join(photo_dir, file_name)) + print ("IMAGE: %s" % file_name) def lookup_geocode(status): @@ -37,16 +57,20 @@ def lookup_geocode(status): print('GEOCODER QUOTA EXCEEDED: %s' % GEO.count_request) -def process_tweet(status, photo_dir, stalk): - print('\n%s: %s' % (status['user']['screen_name'], status['text'])) - print(status['created_at']) - if photo_dir: - download_photo(status, photo_dir) - if stalk: - lookup_geocode(status) +def process_tweet(status, photo_dir, stalk, no_images_of_retweets): + print('\nUSER: %s\nTWEET: %s' % (status['user']['screen_name'], status['text'])) + print('DATE: %s' % status['created_at']) + + try: + if photo_dir and not (no_images_of_retweets and status.has_key('retweeted_status')): + download_photo(status, photo_dir) + if stalk: + lookup_geocode(status) + except Exception as e: + print ("ALERT exception ignored: %s %s" % (type(e), e)) -def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, count): +def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, no_images_of_retweets, count): """Get tweets containing any words in 'word_list'.""" words = ' OR '.join(word_list) params = {'q':words, 'count':count} @@ -57,7 +81,7 @@ def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, count): for item in pager.get_iterator(): if 'text' in item: if not no_retweets or not item.has_key('retweeted_status'): - process_tweet(item, photo_dir, stalk) + process_tweet(item, photo_dir, stalk, no_images_of_retweets) elif 'message' in item: if item['code'] == 131: continue # ignore internal server error @@ -79,7 +103,8 @@ def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, count): parser.add_argument('-count', type=int, default=15, help='download batch size') parser.add_argument('-location', type=str, help='limit tweets to a place') parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file') - parser.add_argument('-no_retweets', action='store_true', help='exclude re-tweets') + parser.add_argument('-no_retweets', action='store_true', help='exclude re-tweets completely') + parser.add_argument('-no_images_of_retweets', action='store_true', help='exclude re-tweet images') parser.add_argument('-photo_dir', metavar='DIRECTORYNAME', type=str, help='download photos to this directory') parser.add_argument('-stalk', action='store_true', help='print tweet location') parser.add_argument('-words', metavar='W', type=str, nargs='+', help='word(s) to search') @@ -98,10 +123,10 @@ def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, count): print('Google found region at %f,%f with a radius of %s km' % (lat, lng, radius)) else: region = None - search_tweets(api, args.words, args.photo_dir, region, args.stalk, args.no_retweets, args.count) + search_tweets(api, args.words, args.photo_dir, region, args.stalk, args.no_retweets, args.no_images_of_retweets, args.count) except KeyboardInterrupt: print('\nTerminated by user\n') except Exception as e: - print('*** STOPPED %s\n' % e) + print('*** STOPPED %s %s\n' % (type(e), e)) GEO.print_stats()