Kaynağa Gözat

some dorking around initial work

john melesky 8 yıl önce
ebeveyn
işleme
8c9b499261
4 değiştirilmiş dosya ile 173 ekleme ve 1 silme
  1. 1 1
      LICENSE
  2. 40 0
      examples/plain.txt
  3. 6 0
      social-skimmer/__init__.py
  4. 126 0
      social-skimmer/tester.py

+ 1 - 1
LICENSE

@@ -1,5 +1,5 @@
 ISC License:
 ISC License:
-Copyright (c) Year(s), Company or Person's Name
+Copyright (c) 2017, jmelesky
 
 
 Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies.
 Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies.
 
 

+ 40 - 0
examples/plain.txt

@@ -0,0 +1,40 @@
+{'created_at': 'Mon Jan 16 15:20:11 +0000 2017',
+ 'favorite_count': 24781,
+ 'full_text': '"We know through painful experience that freedom is never '
+              'voluntarily given by the oppressor; it must be demanded by the '
+              'oppressed.” – MLK',
+ 'hashtags': [],
+ 'id': 821014189523222530,
+ 'id_str': '821014189523222530',
+ 'lang': 'en',
+ 'retweet_count': 12578,
+ 'source': '<a href="https://about.twitter.com/products/tweetdeck" '
+           'rel="nofollow">TweetDeck</a>',
+ 'urls': [],
+ 'user': {'created_at': 'Wed Nov 17 17:53:52 +0000 2010',
+          'description': 'The political revolution is just beginning. Tweets '
+                         'by staff.',
+          'favourites_count': 972,
+          'followers_count': 4820986,
+          'following': True,
+          'friends_count': 1518,
+          'geo_enabled': True,
+          'id': 216776631,
+          'lang': 'en',
+          'listed_count': 12543,
+          'location': 'Vermont',
+          'name': 'Bernie Sanders',
+          'profile_background_color': 'EA5047',
+          'profile_background_image_url': 'http://pbs.twimg.com/profile_background_images/576114811475341312/Q2-L3Yol.jpeg',
+          'profile_banner_url': 'https://pbs.twimg.com/profile_banners/216776631/1483920645',
+          'profile_image_url': 'http://pbs.twimg.com/profile_images/794596124686487552/kqpbolIc_normal.jpg',
+          'profile_link_color': '3B94D9',
+          'profile_sidebar_fill_color': 'EFEFEF',
+          'profile_text_color': '050005',
+          'screen_name': 'BernieSanders',
+          'statuses_count': 10237,
+          'time_zone': 'Eastern Time (US & Canada)',
+          'url': 'https://t.co/jpg8Sp1GhR',
+          'utc_offset': -18000,
+          'verified': True},
+ 'user_mentions': []}

+ 6 - 0
social-skimmer/__init__.py

@@ -0,0 +1,6 @@
+# copyright 2017, John Melesky
+# see LICENSE for details
+
+__version__ = '0.0.1'
+__license__ = 'ISC'
+

+ 126 - 0
social-skimmer/tester.py

@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+
+import twitter
+import statistics
+import pprint
+
+api = twitter.Api(consumer_key='85LDKKDGPDUnCECwAQ9eqMU1I', consumer_secret='vnqvnn0W3mYGY7iwUceo2fhNXvjeZZjrROY3OWAThOEfvMiLeb', access_token_key='15665592-92jRnC7vCVQ8Ba0c81oZy1CQzBF9w6Zltw6RmxLV8', access_token_secret='vt4Ey3l2Rmo9QGfl9qY7AUOobvH8GoDggYT7dkDuEEfBb', tweet_mode='extended')
+
+
+def hist(arr, num_buckets=20):
+    arr_min = min(arr)
+    arr_max = max(arr)
+    stepsize = (arr_max - arr_min) / (num_buckets * 1.0)
+    bucketbounds = []
+    step = arr_min
+    while step < arr_max:
+        bucketbounds.append((step, step+stepsize))
+        step += stepsize
+    print(bucketbounds)
+    buckets = [ 0 for x in bucketbounds ]
+    for x in arr:
+        for i in range(num_buckets):
+            (lower, upper) = bucketbounds[i]
+            if lower <= x < upper:
+                buckets[i] += 1
+    return buckets
+
+
+
+
+
+statuses = []
+min_id = None
+
+
+for i in range(5):
+    sl = []
+    if min_id:
+        sl = api.GetHomeTimeline(200, max_id=min_id)
+    else:
+        sl = api.GetHomeTimeline(200)
+
+    ids = [x.id for x in sl]
+    print("start: %s" % ids[0])
+    print("max:   %s" % max(ids))
+    print("end:   %s" % ids[-1])
+    print("min:   %s" % min(ids))
+    print("number retrieved: %s" % len(sl))
+    print("-------------")
+
+    min_id = sl[-1].id
+    statuses += sl
+
+print(len(statuses))
+
+
+fs = []
+rs = []
+
+for s in statuses:
+    fs.append(s.favorite_count)
+    rs.append(s.retweet_count)
+
+print("min favorites:      %s" % min(fs))
+print("max favorites:      %s" % max(fs))
+print("average favorites:  %s" % statistics.mean(fs))
+print("mode of favorites:  %s" % statistics.mode(fs))
+print("stddev favorites:   %s" % statistics.stdev(fs))
+
+print("favorites histogram:")
+for x in hist(fs):
+    barlen = round(x / 5.0)
+    print("%04d: %s" % (x, '#' * barlen))
+
+
+print("min retweets:      %s" % min(rs))
+print("max retweets:      %s" % max(rs))
+print("average retweets:  %s" % statistics.mean(rs))
+print("mode of retweets:  %s" % statistics.mode(rs))
+print("stddev retweets:   %s" % statistics.stdev(rs))
+
+
+print("retweets histogram:")
+for x in hist(rs):
+    barlen = round(x / 5.0)
+    print("%04d: %s" % (x, '#' * barlen))
+
+
+
+statuses.sort(key=lambda x: (x.favorite_count + x.retweet_count))
+
+
+print("---------")
+pprint.pprint(statuses[-1].AsDict())
+
+print("---------")
+pprint.pprint(statuses[-2].AsDict())
+
+print("---------")
+pprint.pprint(statuses[-3].AsDict())
+
+print("---------")
+pprint.pprint(statuses[-4].AsDict())
+
+print("---------")
+pprint.pprint(statuses[-5].AsDict())
+
+print("---------")
+pprint.pprint(statuses[-6].AsDict())
+
+
+
+print("----------------------\n" * 5)
+
+
+friend_ids = set(api.GetFriendIDs())
+
+whitelisted_statuses = [s for s in statuses if (not s.retweeted_status or s.retweeted_status.user.id in friend_ids) ]
+
+print(len(whitelisted_statuses))
+
+for s in whitelisted_statuses:
+    print("%s: %s" % (s.user.screen_name, s.full_text))
+
+
+