Эх сурвалжийг харах

some dorking around initial work

john melesky 8 жил өмнө
parent
commit
8c9b499261

+ 1 - 1
LICENSE

@@ -1,5 +1,5 @@
 ISC License:
-Copyright (c) Year(s), Company or Person's Name
+Copyright (c) 2017, jmelesky
 
 Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies.
 

+ 40 - 0
examples/plain.txt

@@ -0,0 +1,40 @@
+{'created_at': 'Mon Jan 16 15:20:11 +0000 2017',
+ 'favorite_count': 24781,
+ 'full_text': '"We know through painful experience that freedom is never '
+              'voluntarily given by the oppressor; it must be demanded by the '
+              'oppressed.” – MLK',
+ 'hashtags': [],
+ 'id': 821014189523222530,
+ 'id_str': '821014189523222530',
+ 'lang': 'en',
+ 'retweet_count': 12578,
+ 'source': '<a href="https://about.twitter.com/products/tweetdeck" '
+           'rel="nofollow">TweetDeck</a>',
+ 'urls': [],
+ 'user': {'created_at': 'Wed Nov 17 17:53:52 +0000 2010',
+          'description': 'The political revolution is just beginning. Tweets '
+                         'by staff.',
+          'favourites_count': 972,
+          'followers_count': 4820986,
+          'following': True,
+          'friends_count': 1518,
+          'geo_enabled': True,
+          'id': 216776631,
+          'lang': 'en',
+          'listed_count': 12543,
+          'location': 'Vermont',
+          'name': 'Bernie Sanders',
+          'profile_background_color': 'EA5047',
+          'profile_background_image_url': 'http://pbs.twimg.com/profile_background_images/576114811475341312/Q2-L3Yol.jpeg',
+          'profile_banner_url': 'https://pbs.twimg.com/profile_banners/216776631/1483920645',
+          'profile_image_url': 'http://pbs.twimg.com/profile_images/794596124686487552/kqpbolIc_normal.jpg',
+          'profile_link_color': '3B94D9',
+          'profile_sidebar_fill_color': 'EFEFEF',
+          'profile_text_color': '050005',
+          'screen_name': 'BernieSanders',
+          'statuses_count': 10237,
+          'time_zone': 'Eastern Time (US & Canada)',
+          'url': 'https://t.co/jpg8Sp1GhR',
+          'utc_offset': -18000,
+          'verified': True},
+ 'user_mentions': []}

+ 6 - 0
social-skimmer/__init__.py

@@ -0,0 +1,6 @@
+# copyright 2017, John Melesky
+# see LICENSE for details
+
+__version__ = '0.0.1'
+__license__ = 'ISC'
+

+ 126 - 0
social-skimmer/tester.py

@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+
+import twitter
+import statistics
+import pprint
+
+api = twitter.Api(consumer_key='85LDKKDGPDUnCECwAQ9eqMU1I', consumer_secret='vnqvnn0W3mYGY7iwUceo2fhNXvjeZZjrROY3OWAThOEfvMiLeb', access_token_key='15665592-92jRnC7vCVQ8Ba0c81oZy1CQzBF9w6Zltw6RmxLV8', access_token_secret='vt4Ey3l2Rmo9QGfl9qY7AUOobvH8GoDggYT7dkDuEEfBb', tweet_mode='extended')
+
+
+def hist(arr, num_buckets=20):
+    arr_min = min(arr)
+    arr_max = max(arr)
+    stepsize = (arr_max - arr_min) / (num_buckets * 1.0)
+    bucketbounds = []
+    step = arr_min
+    while step < arr_max:
+        bucketbounds.append((step, step+stepsize))
+        step += stepsize
+    print(bucketbounds)
+    buckets = [ 0 for x in bucketbounds ]
+    for x in arr:
+        for i in range(num_buckets):
+            (lower, upper) = bucketbounds[i]
+            if lower <= x < upper:
+                buckets[i] += 1
+    return buckets
+
+
+
+
+
+statuses = []
+min_id = None
+
+
+for i in range(5):
+    sl = []
+    if min_id:
+        sl = api.GetHomeTimeline(200, max_id=min_id)
+    else:
+        sl = api.GetHomeTimeline(200)
+
+    ids = [x.id for x in sl]
+    print("start: %s" % ids[0])
+    print("max:   %s" % max(ids))
+    print("end:   %s" % ids[-1])
+    print("min:   %s" % min(ids))
+    print("number retrieved: %s" % len(sl))
+    print("-------------")
+
+    min_id = sl[-1].id
+    statuses += sl
+
+print(len(statuses))
+
+
+fs = []
+rs = []
+
+for s in statuses:
+    fs.append(s.favorite_count)
+    rs.append(s.retweet_count)
+
+print("min favorites:      %s" % min(fs))
+print("max favorites:      %s" % max(fs))
+print("average favorites:  %s" % statistics.mean(fs))
+print("mode of favorites:  %s" % statistics.mode(fs))
+print("stddev favorites:   %s" % statistics.stdev(fs))
+
+print("favorites histogram:")
+for x in hist(fs):
+    barlen = round(x / 5.0)
+    print("%04d: %s" % (x, '#' * barlen))
+
+
+print("min retweets:      %s" % min(rs))
+print("max retweets:      %s" % max(rs))
+print("average retweets:  %s" % statistics.mean(rs))
+print("mode of retweets:  %s" % statistics.mode(rs))
+print("stddev retweets:   %s" % statistics.stdev(rs))
+
+
+print("retweets histogram:")
+for x in hist(rs):
+    barlen = round(x / 5.0)
+    print("%04d: %s" % (x, '#' * barlen))
+
+
+
+statuses.sort(key=lambda x: (x.favorite_count + x.retweet_count))
+
+
+print("---------")
+pprint.pprint(statuses[-1].AsDict())
+
+print("---------")
+pprint.pprint(statuses[-2].AsDict())
+
+print("---------")
+pprint.pprint(statuses[-3].AsDict())
+
+print("---------")
+pprint.pprint(statuses[-4].AsDict())
+
+print("---------")
+pprint.pprint(statuses[-5].AsDict())
+
+print("---------")
+pprint.pprint(statuses[-6].AsDict())
+
+
+
+print("----------------------\n" * 5)
+
+
+friend_ids = set(api.GetFriendIDs())
+
+whitelisted_statuses = [s for s in statuses if (not s.retweeted_status or s.retweeted_status.user.id in friend_ids) ]
+
+print(len(whitelisted_statuses))
+
+for s in whitelisted_statuses:
+    print("%s: %s" % (s.user.screen_name, s.full_text))
+
+
+