From 0b2bcd0e01730b7619830cde764f878b284129f9 Mon Sep 17 00:00:00 2001 From: Andreas Date: Sun, 5 Nov 2017 16:54:36 +0000 Subject: [PATCH 01/10] how to install --- .gitignore | 5 +++++ README.md | 13 +++++++++++++ twitterapi-oauth.txt | 7 +++++++ 3 files changed, 25 insertions(+) create mode 100644 twitterapi-oauth.txt diff --git a/.gitignore b/.gitignore index e147773..f668b4b 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,8 @@ nosetests.xml .mr.developer.cfg .project .pydevproject + +# drandreaskrueger: +env + + diff --git a/README.md b/README.md index a0acaf4..b708eed 100644 --- a/README.md +++ b/README.md @@ -44,3 +44,16 @@ Dependencies * TwitterAPI * pygeocoder * Fridge + +Install +------- + + python3 -m venv env + source env/bin/activate + python3 setup.py build + python3 setup.py install + pip3 install --upgrade setuptools wheel + pip3 install fridge pygeocoder TwitterAPI + + + diff --git a/twitterapi-oauth.txt b/twitterapi-oauth.txt new file mode 100644 index 0000000..c02b3a7 --- /dev/null +++ b/twitterapi-oauth.txt @@ -0,0 +1,7 @@ +consumer_key=YOUR_CONSUMER_KEY + +consumer_secret=YOUR_CONSUMER_SECRET + +access_token_key=YOUR_ACCESS_TOKEN + +access_token_secret=YOUR_ACCESS_TOKEN_SECRET From 6228cb2a9b1898342d712cbb59251fccd921faa1 Mon Sep 17 00:00:00 2001 From: drandreaskrueger Date: Sun, 5 Nov 2017 16:56:23 +0000 Subject: [PATCH 02/10] ignore personalized oauth file --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index f668b4b..14c6009 100644 --- a/.gitignore +++ b/.gitignore @@ -38,5 +38,6 @@ nosetests.xml # drandreaskrueger: env +twitterapi-oauth.txt From f6a3a343c5a374163c0b089a2ca830d1e8d5f632 Mon Sep 17 00:00:00 2001 From: drandreaskrueger Date: Sun, 5 Nov 2017 17:00:56 +0000 Subject: [PATCH 03/10] no user credentials in repo --- .gitignore | 1 - twitterapi-oauth.txt | 7 ------- 2 files changed, 8 deletions(-) delete mode 100644 twitterapi-oauth.txt diff --git a/.gitignore b/.gitignore index 14c6009..e2a7c6a 100644 --- a/.gitignore +++ b/.gitignore @@ -40,4 +40,3 @@ nosetests.xml env twitterapi-oauth.txt - diff --git a/twitterapi-oauth.txt b/twitterapi-oauth.txt deleted file mode 100644 index c02b3a7..0000000 --- a/twitterapi-oauth.txt +++ /dev/null @@ -1,7 +0,0 @@ -consumer_key=YOUR_CONSUMER_KEY - -consumer_secret=YOUR_CONSUMER_SECRET - -access_token_key=YOUR_ACCESS_TOKEN - -access_token_secret=YOUR_ACCESS_TOKEN_SECRET From 35ba1c47cb9c7833bbcbaa17de57ee8b98e9e0d5 Mon Sep 17 00:00:00 2001 From: drandreaskrueger Date: Sun, 5 Nov 2017 17:09:42 +0000 Subject: [PATCH 04/10] Twitter oauth --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b708eed..20e26e1 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,11 @@ For help: Authentication -------------- -See TwitterAPI documentation. +See TwitterAPI documentation: + +* ['create your app'](https://apps.twitter.com) +* [file format](http://pythonhosted.org/TwitterAPI/twitteroauth.html ) +* [explanations](https://developer.twitter.com/en/docs/basics/getting-started) Geocoder -------- From 2d2164af45483741fe09b93a59b07d195344d7bc Mon Sep 17 00:00:00 2001 From: drandreaskrueger Date: Sun, 5 Nov 2017 17:16:32 +0000 Subject: [PATCH 05/10] trying python2 because error: module 'urllib' has no attribute 'urlretrieve' --- README.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 20e26e1..f85ddab 100644 --- a/README.md +++ b/README.md @@ -54,10 +54,20 @@ Install python3 -m venv env source env/bin/activate - python3 setup.py build - python3 setup.py install pip3 install --upgrade setuptools wheel pip3 install fridge pygeocoder TwitterAPI + python3 setup.py build + python3 setup.py install + +or + deactivate + virtualenv env/py2 + python --version + source env/py2/bin/activate + pip install --upgrade setuptools wheel + pip install fridge pygeocoder TwitterAPI + python setup.py build + python setup.py install From 6cb7373527fba16f270decf320f8bb6f3327dcf7 Mon Sep 17 00:00:00 2001 From: drandreaskrueger Date: Sun, 5 Nov 2017 17:24:59 +0000 Subject: [PATCH 06/10] got it working hooray --- .gitignore | 2 +- README.md | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index e2a7c6a..c84bb3f 100644 --- a/.gitignore +++ b/.gitignore @@ -39,4 +39,4 @@ nosetests.xml # drandreaskrueger: env twitterapi-oauth.txt - +photos/ diff --git a/README.md b/README.md index f85ddab..7f08c61 100644 --- a/README.md +++ b/README.md @@ -52,14 +52,14 @@ Dependencies Install ------- - python3 -m venv env - source env/bin/activate + python3 -m venv env/py3 + source env/py3/bin/activate pip3 install --upgrade setuptools wheel pip3 install fridge pygeocoder TwitterAPI python3 setup.py build python3 setup.py install -or +or python2.7 deactivate virtualenv env/py2 From 0deb04615ddc0dc3b8203f5999c702c8b9413928 Mon Sep 17 00:00:00 2001 From: drandreaskrueger Date: Sun, 5 Nov 2017 17:46:08 +0000 Subject: [PATCH 07/10] how to catch and ignore it failing with *** STOPPED ('The read operation timed out',) --- TwitterGeoPics/SearchOldTweets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TwitterGeoPics/SearchOldTweets.py b/TwitterGeoPics/SearchOldTweets.py index 75217e6..31dd3fd 100644 --- a/TwitterGeoPics/SearchOldTweets.py +++ b/TwitterGeoPics/SearchOldTweets.py @@ -102,6 +102,6 @@ def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, count): except KeyboardInterrupt: print('\nTerminated by user\n') except Exception as e: - print('*** STOPPED %s\n' % e) + print('*** STOPPED %s %s\n' % (type(e), e)) GEO.print_stats() From 477d88d342c9cfb4eca5efeb7f788c09253a36fa Mon Sep 17 00:00:00 2001 From: drandreaskrueger Date: Sun, 5 Nov 2017 19:05:31 +0000 Subject: [PATCH 08/10] unique filenames, ignore exceptions, parse-able printing, new switch no_images_of_retweets --- TwitterGeoPics/SearchOldTweets.py | 53 ++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/TwitterGeoPics/SearchOldTweets.py b/TwitterGeoPics/SearchOldTweets.py index 31dd3fd..3980780 100644 --- a/TwitterGeoPics/SearchOldTweets.py +++ b/TwitterGeoPics/SearchOldTweets.py @@ -9,20 +9,38 @@ import sys from TwitterAPI import TwitterAPI, TwitterOAuth, TwitterRestPager import urllib - +import datetime GEO = Geocoder() +def parse_date(status): + """ + expects date in this strange format: Sun Nov 05 17:14:42 +0000 2017 + FIXME: try with other twitter timezones please. Might need %z ? + TODO: Ending downloads as soon as cutoff datetime is reached? + """ + return datetime.datetime.strptime(status['created_at'], + '%a %b %d %H:%M:%S +0000 %Y') + +def unique_name(status): + """ + Unique filename for images, concatenating screen_name and timestamp + """ + screen_name = status['user']['screen_name'] + when = parse_date(status).strftime('%Y%m%d-%H%M%S') + file_name = screen_name + "_" + when + return file_name def download_photo(status, photo_dir): """Download photo(s) from embedded url(s).""" if 'media' in status['entities']: for media in status['entities'].get('media'): if media['type'] == 'photo': + file_name = unique_name(status) photo_url = media['media_url_https'] - screen_name = status['user']['screen_name'] - file_name = os.path.join(photo_dir, screen_name) + '.' + photo_url.split('.')[-1] - urllib.urlretrieve(photo_url, file_name) + file_name += '.' + photo_url.split('.')[-1] + urllib.urlretrieve(photo_url, os.path.join(photo_dir, file_name)) + print ("IMAGE: %s" % file_name) def lookup_geocode(status): @@ -37,16 +55,20 @@ def lookup_geocode(status): print('GEOCODER QUOTA EXCEEDED: %s' % GEO.count_request) -def process_tweet(status, photo_dir, stalk): - print('\n%s: %s' % (status['user']['screen_name'], status['text'])) - print(status['created_at']) - if photo_dir: - download_photo(status, photo_dir) - if stalk: - lookup_geocode(status) +def process_tweet(status, photo_dir, stalk, no_images_of_retweets): + print('\nUSER: %s\nTWEET: %s' % (status['user']['screen_name'], status['text'])) + print('DATE: %s' % status['created_at']) + + try: + if photo_dir and not no_images_of_retweets: + download_photo(status, photo_dir) + if stalk: + lookup_geocode(status) + except Exception as e: + print ("ALERT exception ignored: %s %s" % (type(e), e)) -def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, count): +def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, no_images_of_retweets, count): """Get tweets containing any words in 'word_list'.""" words = ' OR '.join(word_list) params = {'q':words, 'count':count} @@ -57,7 +79,7 @@ def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, count): for item in pager.get_iterator(): if 'text' in item: if not no_retweets or not item.has_key('retweeted_status'): - process_tweet(item, photo_dir, stalk) + process_tweet(item, photo_dir, stalk, no_images_of_retweets) elif 'message' in item: if item['code'] == 131: continue # ignore internal server error @@ -79,7 +101,8 @@ def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, count): parser.add_argument('-count', type=int, default=15, help='download batch size') parser.add_argument('-location', type=str, help='limit tweets to a place') parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file') - parser.add_argument('-no_retweets', action='store_true', help='exclude re-tweets') + parser.add_argument('-no_retweets', action='store_true', help='exclude re-tweets completely') + parser.add_argument('-no_images_of_retweets', action='store_true', help='exclude re-tweet images') parser.add_argument('-photo_dir', metavar='DIRECTORYNAME', type=str, help='download photos to this directory') parser.add_argument('-stalk', action='store_true', help='print tweet location') parser.add_argument('-words', metavar='W', type=str, nargs='+', help='word(s) to search') @@ -98,7 +121,7 @@ def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, count): print('Google found region at %f,%f with a radius of %s km' % (lat, lng, radius)) else: region = None - search_tweets(api, args.words, args.photo_dir, region, args.stalk, args.no_retweets, args.count) + search_tweets(api, args.words, args.photo_dir, region, args.stalk, args.no_retweets, args.no_images_of_retweets, args.count) except KeyboardInterrupt: print('\nTerminated by user\n') except Exception as e: From a581ea26342a4e26a4a38150f1fe1b8baa476752 Mon Sep 17 00:00:00 2001 From: drandreaskrueger Date: Sun, 5 Nov 2017 19:18:11 +0000 Subject: [PATCH 09/10] boolean corrected --- TwitterGeoPics/SearchOldTweets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TwitterGeoPics/SearchOldTweets.py b/TwitterGeoPics/SearchOldTweets.py index 3980780..1a19135 100644 --- a/TwitterGeoPics/SearchOldTweets.py +++ b/TwitterGeoPics/SearchOldTweets.py @@ -60,7 +60,7 @@ def process_tweet(status, photo_dir, stalk, no_images_of_retweets): print('DATE: %s' % status['created_at']) try: - if photo_dir and not no_images_of_retweets: + if photo_dir and not (no_images_of_retweets and status.has_key('retweeted_status')): download_photo(status, photo_dir) if stalk: lookup_geocode(status) From 0609dddb751a269efcef520e5e07a3c9cd3cb3a0 Mon Sep 17 00:00:00 2001 From: drandreaskrueger Date: Sun, 5 Nov 2017 20:14:10 +0000 Subject: [PATCH 10/10] time ordering makes more sense than by user handle --- TwitterGeoPics/SearchOldTweets.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/TwitterGeoPics/SearchOldTweets.py b/TwitterGeoPics/SearchOldTweets.py index 1a19135..5599fa5 100644 --- a/TwitterGeoPics/SearchOldTweets.py +++ b/TwitterGeoPics/SearchOldTweets.py @@ -28,7 +28,9 @@ def unique_name(status): """ screen_name = status['user']['screen_name'] when = parse_date(status).strftime('%Y%m%d-%H%M%S') - file_name = screen_name + "_" + when + # file_name = screen_name + "_" + when + # file_name = when + "_" + screen_name + file_name = when + "_" + screen_name return file_name def download_photo(status, photo_dir):