#%% Libraries
import numpy as np
import pandas as pd
import json
import requests
import time
import tweepy
import statsmodels.api as sm
import matplotlib.pyplot as plt
#%% Load Twitter Archive data
df_archive = pd.read_csv('twitter-archive-enhanced.csv')
#%% Get Image Prediction data
url = 'https://d17h27t6h515a5.cloudfront.net/topher/2017/August/599fd2ad_image-predictions/image-predictions.tsv'
response = requests.get(url)
file_name = url.split('/')[-1]
with open(file_name, 'w') as f:
f.write(response.text)
df_predictions = pd.read_csv('image-predictions.tsv', sep='\t')
#%% Get Twitter keys
with open('twitter_keys.txt', 'r') as f:
twitter_keys = f.read()
# Convert dictionary string to dictionary
twitter_keys = eval(twitter_keys.rstrip())
#%% Create Twitter API object
auth = tweepy.OAuthHandler(twitter_keys['consumer_key'], twitter_keys['consumer_secret'])
auth.set_access_token(twitter_keys['access_token'], twitter_keys['access_token_secret'])
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
#%% Load Tweet ids from Archive
tweet_ids = df_archive.tweet_id.tolist()
#%% Get tweets and save JSON strings to file
with open('tweet_json.txt', 'w') as file:
count_attempt = 0
count_failed = 0
for tw_id in tweet_ids:
try:
tweet = api.get_status(tw_id, tweet_mode='extended')
file.write(json.dumps(tweet._json) + '\n')
except:
print('Problem getting tweet:', tw_id)
count_failed += 1
finally:
count_attempt += 1
print('Number tried: {}. Number failed: {}.'.format(count_attempt, count_failed))
time.sleep(1.1) # number of seconds to pause program
Number tried: 1. Number failed: 0. Number tried: 2. Number failed: 0. Number tried: 3. Number failed: 0. Number tried: 4. Number failed: 0. Number tried: 5. Number failed: 0. Number tried: 6. Number failed: 0. Number tried: 7. Number failed: 0. Number tried: 8. Number failed: 0. Number tried: 9. Number failed: 0. Number tried: 10. Number failed: 0. Number tried: 11. Number failed: 0. Number tried: 12. Number failed: 0. Number tried: 13. Number failed: 0. Number tried: 14. Number failed: 0. Number tried: 15. Number failed: 0. Number tried: 16. Number failed: 0. Number tried: 17. Number failed: 0. Number tried: 18. Number failed: 0. Number tried: 19. Number failed: 0. Problem getting tweet: 888202515573088257 Number tried: 20. Number failed: 1. Number tried: 21. Number failed: 1. Number tried: 22. Number failed: 1. Number tried: 23. Number failed: 1. Number tried: 24. Number failed: 1. Number tried: 25. Number failed: 1. Number tried: 26. Number failed: 1. Number tried: 27. Number failed: 1. Number tried: 28. Number failed: 1. Number tried: 29. Number failed: 1. Number tried: 30. Number failed: 1. Number tried: 31. Number failed: 1. Number tried: 32. Number failed: 1. Number tried: 33. Number failed: 1. Number tried: 34. Number failed: 1. Number tried: 35. Number failed: 1. Number tried: 36. Number failed: 1. Number tried: 37. Number failed: 1. Number tried: 38. Number failed: 1. Number tried: 39. Number failed: 1. Number tried: 40. Number failed: 1. Number tried: 41. Number failed: 1. Number tried: 42. Number failed: 1. Number tried: 43. Number failed: 1. Number tried: 44. Number failed: 1. Number tried: 45. Number failed: 1. Number tried: 46. Number failed: 1. Number tried: 47. Number failed: 1. Number tried: 48. Number failed: 1. Number tried: 49. Number failed: 1. Number tried: 50. Number failed: 1. Number tried: 51. Number failed: 1. Number tried: 52. Number failed: 1. Number tried: 53. Number failed: 1. Number tried: 54. Number failed: 1. Number tried: 55. Number failed: 1. Number tried: 56. Number failed: 1. Number tried: 57. Number failed: 1. Number tried: 58. Number failed: 1. Number tried: 59. Number failed: 1. Number tried: 60. Number failed: 1. Number tried: 61. Number failed: 1. Number tried: 62. Number failed: 1. Number tried: 63. Number failed: 1. Number tried: 64. Number failed: 1. Number tried: 65. Number failed: 1. Number tried: 66. Number failed: 1. Number tried: 67. Number failed: 1. Number tried: 68. Number failed: 1. Number tried: 69. Number failed: 1. Number tried: 70. Number failed: 1. Number tried: 71. Number failed: 1. Number tried: 72. Number failed: 1. Number tried: 73. Number failed: 1. Number tried: 74. Number failed: 1. Number tried: 75. Number failed: 1. Number tried: 76. Number failed: 1. Number tried: 77. Number failed: 1. Number tried: 78. Number failed: 1. Number tried: 79. Number failed: 1. Number tried: 80. Number failed: 1. Number tried: 81. Number failed: 1. Number tried: 82. Number failed: 1. Number tried: 83. Number failed: 1. Number tried: 84. Number failed: 1. Number tried: 85. Number failed: 1. Number tried: 86. Number failed: 1. Number tried: 87. Number failed: 1. Number tried: 88. Number failed: 1. Number tried: 89. Number failed: 1. Number tried: 90. Number failed: 1. Number tried: 91. Number failed: 1. Number tried: 92. Number failed: 1. Number tried: 93. Number failed: 1. Number tried: 94. Number failed: 1. Number tried: 95. Number failed: 1. Problem getting tweet: 873697596434513921 Number tried: 96. Number failed: 2. Number tried: 97. Number failed: 2. Number tried: 98. Number failed: 2. Number tried: 99. Number failed: 2. Number tried: 100. Number failed: 2. Number tried: 101. Number failed: 2. Problem getting tweet: 872668790621863937 Number tried: 102. Number failed: 3. Number tried: 103. Number failed: 3. Number tried: 104. Number failed: 3. Problem getting tweet: 872261713294495745 Number tried: 105. Number failed: 4. Number tried: 106. Number failed: 4. Number tried: 107. Number failed: 4. Number tried: 108. Number failed: 4. Number tried: 109. Number failed: 4. Number tried: 110. Number failed: 4. Number tried: 111. Number failed: 4. Number tried: 112. Number failed: 4. Number tried: 113. Number failed: 4. Number tried: 114. Number failed: 4. Number tried: 115. Number failed: 4. Number tried: 116. Number failed: 4. Number tried: 117. Number failed: 4. Number tried: 118. Number failed: 4. Problem getting tweet: 869988702071779329 Number tried: 119. Number failed: 5. Number tried: 120. Number failed: 5. Number tried: 121. Number failed: 5. Number tried: 122. Number failed: 5. Number tried: 123. Number failed: 5. Number tried: 124. Number failed: 5. Number tried: 125. Number failed: 5. Number tried: 126. Number failed: 5. Number tried: 127. Number failed: 5. Number tried: 128. Number failed: 5. Number tried: 129. Number failed: 5. Number tried: 130. Number failed: 5. Number tried: 131. Number failed: 5. Number tried: 132. Number failed: 5. Problem getting tweet: 866816280283807744 Number tried: 133. Number failed: 6. Number tried: 134. Number failed: 6. Number tried: 135. Number failed: 6. Number tried: 136. Number failed: 6. Number tried: 137. Number failed: 6. Number tried: 138. Number failed: 6. Number tried: 139. Number failed: 6. Number tried: 140. Number failed: 6. Number tried: 141. Number failed: 6. Number tried: 142. Number failed: 6. Number tried: 143. Number failed: 6. Number tried: 144. Number failed: 6. Number tried: 145. Number failed: 6. Number tried: 146. Number failed: 6. Number tried: 147. Number failed: 6. Number tried: 148. Number failed: 6. Number tried: 149. Number failed: 6. Number tried: 150. Number failed: 6. Number tried: 151. Number failed: 6. Number tried: 152. Number failed: 6. Number tried: 153. Number failed: 6. Number tried: 154. Number failed: 6. Number tried: 155. Number failed: 6. Problem getting tweet: 861769973181624320 Number tried: 156. Number failed: 7. Number tried: 157. Number failed: 7. Number tried: 158. Number failed: 7. Number tried: 159. Number failed: 7. Number tried: 160. Number failed: 7. Number tried: 161. Number failed: 7. Number tried: 162. Number failed: 7. Number tried: 163. Number failed: 7. Number tried: 164. Number failed: 7. Number tried: 165. Number failed: 7. Number tried: 166. Number failed: 7. Number tried: 167. Number failed: 7. Number tried: 168. Number failed: 7. Number tried: 169. Number failed: 7. Number tried: 170. Number failed: 7. Number tried: 171. Number failed: 7. Number tried: 172. Number failed: 7. Number tried: 173. Number failed: 7. Number tried: 174. Number failed: 7. Number tried: 175. Number failed: 7. Number tried: 176. Number failed: 7. Number tried: 177. Number failed: 7. Number tried: 178. Number failed: 7. Number tried: 179. Number failed: 7. Number tried: 180. Number failed: 7. Number tried: 181. Number failed: 7. Number tried: 182. Number failed: 7. Problem getting tweet: 856602993587888130 Number tried: 183. Number failed: 8. Number tried: 184. Number failed: 8. Number tried: 185. Number failed: 8. Number tried: 186. Number failed: 8. Number tried: 187. Number failed: 8. Number tried: 188. Number failed: 8. Number tried: 189. Number failed: 8. Number tried: 190. Number failed: 8. Number tried: 191. Number failed: 8. Number tried: 192. Number failed: 8. Number tried: 193. Number failed: 8. Number tried: 194. Number failed: 8. Number tried: 195. Number failed: 8. Number tried: 196. Number failed: 8. Number tried: 197. Number failed: 8. Number tried: 198. Number failed: 8. Number tried: 199. Number failed: 8. Number tried: 200. Number failed: 8. Number tried: 201. Number failed: 8. Number tried: 202. Number failed: 8. Number tried: 203. Number failed: 8. Number tried: 204. Number failed: 8. Number tried: 205. Number failed: 8. Number tried: 206. Number failed: 8. Number tried: 207. Number failed: 8. Number tried: 208. Number failed: 8. Number tried: 209. Number failed: 8. Number tried: 210. Number failed: 8. Number tried: 211. Number failed: 8. Problem getting tweet: 851953902622658560 Number tried: 212. Number failed: 9. Number tried: 213. Number failed: 9. Number tried: 214. Number failed: 9. Number tried: 215. Number failed: 9. Number tried: 216. Number failed: 9. Number tried: 217. Number failed: 9. Number tried: 218. Number failed: 9. Number tried: 219. Number failed: 9. Number tried: 220. Number failed: 9. Number tried: 221. Number failed: 9. Number tried: 222. Number failed: 9. Number tried: 223. Number failed: 9. Number tried: 224. Number failed: 9. Number tried: 225. Number failed: 9. Number tried: 226. Number failed: 9. Number tried: 227. Number failed: 9. Number tried: 228. Number failed: 9. Number tried: 229. Number failed: 9. Number tried: 230. Number failed: 9. Number tried: 231. Number failed: 9. Number tried: 232. Number failed: 9. Number tried: 233. Number failed: 9. Number tried: 234. Number failed: 9. Number tried: 235. Number failed: 9. Number tried: 236. Number failed: 9. Number tried: 237. Number failed: 9. Number tried: 238. Number failed: 9. Number tried: 239. Number failed: 9. Number tried: 240. Number failed: 9. Number tried: 241. Number failed: 9. Number tried: 242. Number failed: 9. Number tried: 243. Number failed: 9. Number tried: 244. Number failed: 9. Number tried: 245. Number failed: 9. Number tried: 246. Number failed: 9. Number tried: 247. Number failed: 9. Problem getting tweet: 845459076796616705 Number tried: 248. Number failed: 10. Number tried: 249. Number failed: 10. Number tried: 250. Number failed: 10. Number tried: 251. Number failed: 10. Number tried: 252. Number failed: 10. Number tried: 253. Number failed: 10. Problem getting tweet: 844704788403113984 Number tried: 254. Number failed: 11. Number tried: 255. Number failed: 11. Number tried: 256. Number failed: 11. Number tried: 257. Number failed: 11. Number tried: 258. Number failed: 11. Number tried: 259. Number failed: 11. Number tried: 260. Number failed: 11. Problem getting tweet: 842892208864923648 Number tried: 261. Number failed: 12. Number tried: 262. Number failed: 12. Number tried: 263. Number failed: 12. Number tried: 264. Number failed: 12. Number tried: 265. Number failed: 12. Number tried: 266. Number failed: 12. Number tried: 267. Number failed: 12. Number tried: 268. Number failed: 12. Number tried: 269. Number failed: 12. Number tried: 270. Number failed: 12. Number tried: 271. Number failed: 12. Number tried: 272. Number failed: 12. Number tried: 273. Number failed: 12. Number tried: 274. Number failed: 12. Number tried: 275. Number failed: 12. Number tried: 276. Number failed: 12. Number tried: 277. Number failed: 12. Number tried: 278. Number failed: 12. Number tried: 279. Number failed: 12. Number tried: 280. Number failed: 12. Number tried: 281. Number failed: 12. Number tried: 282. Number failed: 12. Number tried: 283. Number failed: 12. Number tried: 284. Number failed: 12. Number tried: 285. Number failed: 12. Number tried: 286. Number failed: 12. Number tried: 287. Number failed: 12. Number tried: 288. Number failed: 12. Number tried: 289. Number failed: 12. Number tried: 290. Number failed: 12. Number tried: 291. Number failed: 12. Number tried: 292. Number failed: 12. Number tried: 293. Number failed: 12. Number tried: 294. Number failed: 12. Number tried: 295. Number failed: 12. Number tried: 296. Number failed: 12. Problem getting tweet: 837366284874571778 Number tried: 297. Number failed: 13. Number tried: 298. Number failed: 13. Problem getting tweet: 837012587749474308 Number tried: 299. Number failed: 14. Number tried: 300. Number failed: 14. Number tried: 301. Number failed: 14. Number tried: 302. Number failed: 14. Number tried: 303. Number failed: 14. Number tried: 304. Number failed: 14. Number tried: 305. Number failed: 14. Number tried: 306. Number failed: 14. Number tried: 307. Number failed: 14. Number tried: 308. Number failed: 14. Number tried: 309. Number failed: 14. Number tried: 310. Number failed: 14. Number tried: 311. Number failed: 14. Number tried: 312. Number failed: 14. Number tried: 313. Number failed: 14. Number tried: 314. Number failed: 14. Number tried: 315. Number failed: 14. Number tried: 316. Number failed: 14. Number tried: 317. Number failed: 14. Number tried: 318. Number failed: 14. Number tried: 319. Number failed: 14. Number tried: 320. Number failed: 14. Number tried: 321. Number failed: 14. Number tried: 322. Number failed: 14. Number tried: 323. Number failed: 14. Number tried: 324. Number failed: 14. Number tried: 325. Number failed: 14. Number tried: 326. Number failed: 14. Number tried: 327. Number failed: 14. Number tried: 328. Number failed: 14. Number tried: 329. Number failed: 14. Number tried: 330. Number failed: 14. Number tried: 331. Number failed: 14. Number tried: 332. Number failed: 14. Number tried: 333. Number failed: 14. Number tried: 334. Number failed: 14. Number tried: 335. Number failed: 14. Number tried: 336. Number failed: 14. Number tried: 337. Number failed: 14. Number tried: 338. Number failed: 14. Number tried: 339. Number failed: 14. Number tried: 340. Number failed: 14. Number tried: 341. Number failed: 14. Number tried: 342. Number failed: 14. Number tried: 343. Number failed: 14. Number tried: 344. Number failed: 14. Number tried: 345. Number failed: 14. Number tried: 346. Number failed: 14. Number tried: 347. Number failed: 14. Number tried: 348. Number failed: 14. Number tried: 349. Number failed: 14. Number tried: 350. Number failed: 14. Number tried: 351. Number failed: 14. Number tried: 352. Number failed: 14. Number tried: 353. Number failed: 14. Number tried: 354. Number failed: 14. Number tried: 355. Number failed: 14. Number tried: 356. Number failed: 14. Number tried: 357. Number failed: 14. Number tried: 358. Number failed: 14. Number tried: 359. Number failed: 14. Number tried: 360. Number failed: 14. Number tried: 361. Number failed: 14. Number tried: 362. Number failed: 14. Number tried: 363. Number failed: 14. Problem getting tweet: 829374341691346946 Number tried: 364. Number failed: 15. Number tried: 365. Number failed: 15. Number tried: 366. Number failed: 15. Number tried: 367. Number failed: 15. Number tried: 368. Number failed: 15. Number tried: 369. Number failed: 15. Number tried: 370. Number failed: 15. Number tried: 371. Number failed: 15. Number tried: 372. Number failed: 15. Number tried: 373. Number failed: 15. Number tried: 374. Number failed: 15. Number tried: 375. Number failed: 15. Number tried: 376. Number failed: 15. Number tried: 377. Number failed: 15. Number tried: 378. Number failed: 15. Number tried: 379. Number failed: 15. Number tried: 380. Number failed: 15. Number tried: 381. Number failed: 15. Number tried: 382. Number failed: 15. Problem getting tweet: 827228250799742977 Number tried: 383. Number failed: 16. Number tried: 384. Number failed: 16. Number tried: 385. Number failed: 16. Number tried: 386. Number failed: 16. Number tried: 387. Number failed: 16. Number tried: 388. Number failed: 16. Number tried: 389. Number failed: 16. Number tried: 390. Number failed: 16. Number tried: 391. Number failed: 16. Number tried: 392. Number failed: 16. Number tried: 393. Number failed: 16. Number tried: 394. Number failed: 16. Number tried: 395. Number failed: 16. Number tried: 396. Number failed: 16. Number tried: 397. Number failed: 16. Number tried: 398. Number failed: 16. Number tried: 399. Number failed: 16. Number tried: 400. Number failed: 16. Number tried: 401. Number failed: 16. Number tried: 402. Number failed: 16. Number tried: 403. Number failed: 16. Number tried: 404. Number failed: 16. Number tried: 405. Number failed: 16. Number tried: 406. Number failed: 16. Number tried: 407. Number failed: 16. Number tried: 408. Number failed: 16. Number tried: 409. Number failed: 16. Number tried: 410. Number failed: 16. Number tried: 411. Number failed: 16. Number tried: 412. Number failed: 16. Number tried: 413. Number failed: 16. Number tried: 414. Number failed: 16. Number tried: 415. Number failed: 16. Number tried: 416. Number failed: 16. Number tried: 417. Number failed: 16. Number tried: 418. Number failed: 16. Number tried: 419. Number failed: 16. Number tried: 420. Number failed: 16. Number tried: 421. Number failed: 16. Number tried: 422. Number failed: 16. Number tried: 423. Number failed: 16. Number tried: 424. Number failed: 16. Number tried: 425. Number failed: 16. Number tried: 426. Number failed: 16. Number tried: 427. Number failed: 16. Number tried: 428. Number failed: 16. Number tried: 429. Number failed: 16. Number tried: 430. Number failed: 16. Number tried: 431. Number failed: 16. Number tried: 432. Number failed: 16. Number tried: 433. Number failed: 16. Number tried: 434. Number failed: 16. Number tried: 435. Number failed: 16. Number tried: 436. Number failed: 16. Number tried: 437. Number failed: 16. Number tried: 438. Number failed: 16. Number tried: 439. Number failed: 16. Number tried: 440. Number failed: 16. Number tried: 441. Number failed: 16. Number tried: 442. Number failed: 16. Number tried: 443. Number failed: 16. Number tried: 444. Number failed: 16. Number tried: 445. Number failed: 16. Number tried: 446. Number failed: 16. Number tried: 447. Number failed: 16. Number tried: 448. Number failed: 16. Number tried: 449. Number failed: 16. Number tried: 450. Number failed: 16. Number tried: 451. Number failed: 16. Number tried: 452. Number failed: 16. Number tried: 453. Number failed: 16. Number tried: 454. Number failed: 16. Number tried: 455. Number failed: 16. Number tried: 456. Number failed: 16. Number tried: 457. Number failed: 16. Number tried: 458. Number failed: 16. Number tried: 459. Number failed: 16. Number tried: 460. Number failed: 16. Number tried: 461. Number failed: 16. Number tried: 462. Number failed: 16. Number tried: 463. Number failed: 16. Number tried: 464. Number failed: 16. Number tried: 465. Number failed: 16. Number tried: 466. Number failed: 16. Number tried: 467. Number failed: 16. Number tried: 468. Number failed: 16. Number tried: 469. Number failed: 16. Number tried: 470. Number failed: 16. Number tried: 471. Number failed: 16. Number tried: 472. Number failed: 16. Number tried: 473. Number failed: 16. Number tried: 474. Number failed: 16. Number tried: 475. Number failed: 16. Number tried: 476. Number failed: 16. Number tried: 477. Number failed: 16. Number tried: 478. Number failed: 16. Number tried: 479. Number failed: 16. Number tried: 480. Number failed: 16. Number tried: 481. Number failed: 16. Number tried: 482. Number failed: 16. Number tried: 483. Number failed: 16. Number tried: 484. Number failed: 16. Number tried: 485. Number failed: 16. Number tried: 486. Number failed: 16. Number tried: 487. Number failed: 16. Number tried: 488. Number failed: 16. Number tried: 489. Number failed: 16. Number tried: 490. Number failed: 16. Number tried: 491. Number failed: 16. Number tried: 492. Number failed: 16. Number tried: 493. Number failed: 16. Number tried: 494. Number failed: 16. Number tried: 495. Number failed: 16. Number tried: 496. Number failed: 16. Number tried: 497. Number failed: 16. Number tried: 498. Number failed: 16. Number tried: 499. Number failed: 16. Number tried: 500. Number failed: 16. Number tried: 501. Number failed: 16. Number tried: 502. Number failed: 16. Number tried: 503. Number failed: 16. Number tried: 504. Number failed: 16. Number tried: 505. Number failed: 16. Number tried: 506. Number failed: 16. Problem getting tweet: 812747805718642688 Number tried: 507. Number failed: 17. Number tried: 508. Number failed: 17. Number tried: 509. Number failed: 17. Number tried: 510. Number failed: 17. Number tried: 511. Number failed: 17. Number tried: 512. Number failed: 17. Number tried: 513. Number failed: 17. Number tried: 514. Number failed: 17. Number tried: 515. Number failed: 17. Number tried: 516. Number failed: 17. Number tried: 517. Number failed: 17. Number tried: 518. Number failed: 17. Number tried: 519. Number failed: 17. Number tried: 520. Number failed: 17. Number tried: 521. Number failed: 17. Number tried: 522. Number failed: 17. Number tried: 523. Number failed: 17. Number tried: 524. Number failed: 17. Number tried: 525. Number failed: 17. Number tried: 526. Number failed: 17. Number tried: 527. Number failed: 17. Number tried: 528. Number failed: 17. Number tried: 529. Number failed: 17. Number tried: 530. Number failed: 17. Number tried: 531. Number failed: 17. Number tried: 532. Number failed: 17. Number tried: 533. Number failed: 17. Number tried: 534. Number failed: 17. Number tried: 535. Number failed: 17. Number tried: 536. Number failed: 17. Number tried: 537. Number failed: 17. Number tried: 538. Number failed: 17. Number tried: 539. Number failed: 17. Number tried: 540. Number failed: 17. Number tried: 541. Number failed: 17. Number tried: 542. Number failed: 17. Number tried: 543. Number failed: 17. Number tried: 544. Number failed: 17. Number tried: 545. Number failed: 17. Number tried: 546. Number failed: 17. Number tried: 547. Number failed: 17. Number tried: 548. Number failed: 17. Number tried: 549. Number failed: 17. Number tried: 550. Number failed: 17. Number tried: 551. Number failed: 17. Number tried: 552. Number failed: 17. Number tried: 553. Number failed: 17. Number tried: 554. Number failed: 17. Number tried: 555. Number failed: 17. Number tried: 556. Number failed: 17. Number tried: 557. Number failed: 17. Number tried: 558. Number failed: 17. Number tried: 559. Number failed: 17. Number tried: 560. Number failed: 17. Number tried: 561. Number failed: 17. Number tried: 562. Number failed: 17. Number tried: 563. Number failed: 17. Number tried: 564. Number failed: 17. Number tried: 565. Number failed: 17. Number tried: 566. Number failed: 17. Problem getting tweet: 802247111496568832 Number tried: 567. Number failed: 18. Number tried: 568. Number failed: 18. Number tried: 569. Number failed: 18. Number tried: 570. Number failed: 18. Number tried: 571. Number failed: 18. Number tried: 572. Number failed: 18. Number tried: 573. Number failed: 18. Number tried: 574. Number failed: 18. Number tried: 575. Number failed: 18. Number tried: 576. Number failed: 18. Number tried: 577. Number failed: 18. Number tried: 578. Number failed: 18. Number tried: 579. Number failed: 18. Number tried: 580. Number failed: 18. Number tried: 581. Number failed: 18. Number tried: 582. Number failed: 18. Number tried: 583. Number failed: 18. Number tried: 584. Number failed: 18. Number tried: 585. Number failed: 18. Number tried: 586. Number failed: 18. Number tried: 587. Number failed: 18. Number tried: 588. Number failed: 18. Number tried: 589. Number failed: 18. Number tried: 590. Number failed: 18. Number tried: 591. Number failed: 18. Number tried: 592. Number failed: 18. Number tried: 593. Number failed: 18. Number tried: 594. Number failed: 18. Number tried: 595. Number failed: 18. Number tried: 596. Number failed: 18. Number tried: 597. Number failed: 18. Number tried: 598. Number failed: 18. Number tried: 599. Number failed: 18. Number tried: 600. Number failed: 18. Number tried: 601. Number failed: 18. Number tried: 602. Number failed: 18. Number tried: 603. Number failed: 18. Number tried: 604. Number failed: 18. Number tried: 605. Number failed: 18. Number tried: 606. Number failed: 18. Number tried: 607. Number failed: 18. Number tried: 608. Number failed: 18. Number tried: 609. Number failed: 18. Number tried: 610. Number failed: 18. Number tried: 611. Number failed: 18. Number tried: 612. Number failed: 18. Number tried: 613. Number failed: 18. Number tried: 614. Number failed: 18. Number tried: 615. Number failed: 18. Number tried: 616. Number failed: 18. Number tried: 617. Number failed: 18. Number tried: 618. Number failed: 18. Number tried: 619. Number failed: 18. Number tried: 620. Number failed: 18. Number tried: 621. Number failed: 18. Number tried: 622. Number failed: 18. Number tried: 623. Number failed: 18. Number tried: 624. Number failed: 18. Number tried: 625. Number failed: 18. Number tried: 626. Number failed: 18. Number tried: 627. Number failed: 18. Number tried: 628. Number failed: 18. Number tried: 629. Number failed: 18. Number tried: 630. Number failed: 18. Number tried: 631. Number failed: 18. Number tried: 632. Number failed: 18. Number tried: 633. Number failed: 18. Number tried: 634. Number failed: 18. Number tried: 635. Number failed: 18. Number tried: 636. Number failed: 18. Number tried: 637. Number failed: 18. Number tried: 638. Number failed: 18. Number tried: 639. Number failed: 18. Number tried: 640. Number failed: 18. Number tried: 641. Number failed: 18. Number tried: 642. Number failed: 18. Number tried: 643. Number failed: 18. Number tried: 644. Number failed: 18. Number tried: 645. Number failed: 18. Number tried: 646. Number failed: 18. Number tried: 647. Number failed: 18. Number tried: 648. Number failed: 18. Number tried: 649. Number failed: 18. Number tried: 650. Number failed: 18. Number tried: 651. Number failed: 18. Number tried: 652. Number failed: 18. Number tried: 653. Number failed: 18. Number tried: 654. Number failed: 18. Number tried: 655. Number failed: 18. Number tried: 656. Number failed: 18. Number tried: 657. Number failed: 18. Number tried: 658. Number failed: 18. Number tried: 659. Number failed: 18. Number tried: 660. Number failed: 18. Number tried: 661. Number failed: 18. Number tried: 662. Number failed: 18. Number tried: 663. Number failed: 18. Number tried: 664. Number failed: 18. Number tried: 665. Number failed: 18. Number tried: 666. Number failed: 18. Number tried: 667. Number failed: 18. Number tried: 668. Number failed: 18. Number tried: 669. Number failed: 18. Number tried: 670. Number failed: 18. Number tried: 671. Number failed: 18. Number tried: 672. Number failed: 18. Number tried: 673. Number failed: 18. Number tried: 674. Number failed: 18. Number tried: 675. Number failed: 18. Number tried: 676. Number failed: 18. Number tried: 677. Number failed: 18. Number tried: 678. Number failed: 18. Number tried: 679. Number failed: 18. Number tried: 680. Number failed: 18. Number tried: 681. Number failed: 18. Number tried: 682. Number failed: 18. Number tried: 683. Number failed: 18. Number tried: 684. Number failed: 18. Number tried: 685. Number failed: 18. Number tried: 686. Number failed: 18. Number tried: 687. Number failed: 18. Number tried: 688. Number failed: 18. Number tried: 689. Number failed: 18. Number tried: 690. Number failed: 18. Number tried: 691. Number failed: 18. Number tried: 692. Number failed: 18. Number tried: 693. Number failed: 18. Number tried: 694. Number failed: 18. Number tried: 695. Number failed: 18. Number tried: 696. Number failed: 18. Number tried: 697. Number failed: 18. Number tried: 698. Number failed: 18. Number tried: 699. Number failed: 18. Number tried: 700. Number failed: 18. Number tried: 701. Number failed: 18. Number tried: 702. Number failed: 18. Number tried: 703. Number failed: 18. Number tried: 704. Number failed: 18. Number tried: 705. Number failed: 18. Number tried: 706. Number failed: 18. Number tried: 707. Number failed: 18. Number tried: 708. Number failed: 18. Number tried: 709. Number failed: 18. Number tried: 710. Number failed: 18. Number tried: 711. Number failed: 18. Number tried: 712. Number failed: 18. Number tried: 713. Number failed: 18. Number tried: 714. Number failed: 18. Number tried: 715. Number failed: 18. Number tried: 716. Number failed: 18. Number tried: 717. Number failed: 18. Number tried: 718. Number failed: 18. Number tried: 719. Number failed: 18. Number tried: 720. Number failed: 18. Number tried: 721. Number failed: 18. Number tried: 722. Number failed: 18. Number tried: 723. Number failed: 18. Number tried: 724. Number failed: 18. Number tried: 725. Number failed: 18. Number tried: 726. Number failed: 18. Number tried: 727. Number failed: 18. Number tried: 728. Number failed: 18. Number tried: 729. Number failed: 18. Number tried: 730. Number failed: 18. Number tried: 731. Number failed: 18. Number tried: 732. Number failed: 18. Number tried: 733. Number failed: 18. Number tried: 734. Number failed: 18. Number tried: 735. Number failed: 18. Number tried: 736. Number failed: 18. Number tried: 737. Number failed: 18. Number tried: 738. Number failed: 18. Number tried: 739. Number failed: 18. Number tried: 740. Number failed: 18. Number tried: 741. Number failed: 18. Number tried: 742. Number failed: 18. Number tried: 743. Number failed: 18. Number tried: 744. Number failed: 18. Number tried: 745. Number failed: 18. Number tried: 746. Number failed: 18. Number tried: 747. Number failed: 18. Number tried: 748. Number failed: 18. Number tried: 749. Number failed: 18. Number tried: 750. Number failed: 18. Problem getting tweet: 779123168116150273 Number tried: 751. Number failed: 19. Number tried: 752. Number failed: 19. Number tried: 753. Number failed: 19. Number tried: 754. Number failed: 19. Number tried: 755. Number failed: 19. Number tried: 756. Number failed: 19. Number tried: 757. Number failed: 19. Number tried: 758. Number failed: 19. Number tried: 759. Number failed: 19. Number tried: 760. Number failed: 19. Number tried: 761. Number failed: 19. Number tried: 762. Number failed: 19. Number tried: 763. Number failed: 19. Number tried: 764. Number failed: 19. Number tried: 765. Number failed: 19. Number tried: 766. Number failed: 19. Number tried: 767. Number failed: 19. Number tried: 768. Number failed: 19. Number tried: 769. Number failed: 19. Number tried: 770. Number failed: 19. Number tried: 771. Number failed: 19. Number tried: 772. Number failed: 19. Number tried: 773. Number failed: 19. Number tried: 774. Number failed: 19. Number tried: 775. Number failed: 19. Number tried: 776. Number failed: 19. Number tried: 777. Number failed: 19. Number tried: 778. Number failed: 19. Number tried: 779. Number failed: 19. Number tried: 780. Number failed: 19. Number tried: 781. Number failed: 19. Number tried: 782. Number failed: 19. Number tried: 783. Number failed: 19. Number tried: 784. Number failed: 19. Problem getting tweet: 775096608509886464 Number tried: 785. Number failed: 20. Number tried: 786. Number failed: 20. Number tried: 787. Number failed: 20. Number tried: 788. Number failed: 20. Number tried: 789. Number failed: 20. Number tried: 790. Number failed: 20. Number tried: 791. Number failed: 20. Number tried: 792. Number failed: 20. Number tried: 793. Number failed: 20. Number tried: 794. Number failed: 20. Number tried: 795. Number failed: 20. Number tried: 796. Number failed: 20. Number tried: 797. Number failed: 20. Number tried: 798. Number failed: 20. Number tried: 799. Number failed: 20. Number tried: 800. Number failed: 20. Number tried: 801. Number failed: 20. Number tried: 802. Number failed: 20. Number tried: 803. Number failed: 20. Number tried: 804. Number failed: 20. Number tried: 805. Number failed: 20. Number tried: 806. Number failed: 20. Number tried: 807. Number failed: 20. Number tried: 808. Number failed: 20. Number tried: 809. Number failed: 20. Number tried: 810. Number failed: 20. Number tried: 811. Number failed: 20. Number tried: 812. Number failed: 20. Number tried: 813. Number failed: 20. Number tried: 814. Number failed: 20. Number tried: 815. Number failed: 20. Problem getting tweet: 771004394259247104 Number tried: 816. Number failed: 21. Number tried: 817. Number failed: 21. Number tried: 818. Number failed: 21. Problem getting tweet: 770743923962707968 Number tried: 819. Number failed: 22. Number tried: 820. Number failed: 22. Number tried: 821. Number failed: 22. Number tried: 822. Number failed: 22. Number tried: 823. Number failed: 22. Number tried: 824. Number failed: 22. Number tried: 825. Number failed: 22. Number tried: 826. Number failed: 22. Number tried: 827. Number failed: 22. Number tried: 828. Number failed: 22. Number tried: 829. Number failed: 22. Number tried: 830. Number failed: 22. Number tried: 831. Number failed: 22. Number tried: 832. Number failed: 22. Number tried: 833. Number failed: 22. Number tried: 834. Number failed: 22. Number tried: 835. Number failed: 22. Number tried: 836. Number failed: 22. Number tried: 837. Number failed: 22. Number tried: 838. Number failed: 22. Number tried: 839. Number failed: 22. Number tried: 840. Number failed: 22. Number tried: 841. Number failed: 22. Number tried: 842. Number failed: 22. Number tried: 843. Number failed: 22. Number tried: 844. Number failed: 22. Number tried: 845. Number failed: 22. Number tried: 846. Number failed: 22. Number tried: 847. Number failed: 22. Number tried: 848. Number failed: 22. Number tried: 849. Number failed: 22. Number tried: 850. Number failed: 22. Number tried: 851. Number failed: 22. Number tried: 852. Number failed: 22. Number tried: 853. Number failed: 22. Number tried: 854. Number failed: 22. Number tried: 855. Number failed: 22. Number tried: 856. Number failed: 22. Number tried: 857. Number failed: 22. Number tried: 858. Number failed: 22. Number tried: 859. Number failed: 22. Number tried: 860. Number failed: 22. Number tried: 861. Number failed: 22. Number tried: 862. Number failed: 22. Number tried: 863. Number failed: 22. Number tried: 864. Number failed: 22. Number tried: 865. Number failed: 22. Number tried: 866. Number failed: 22. Number tried: 867. Number failed: 22. Number tried: 868. Number failed: 22. Number tried: 869. Number failed: 22. Number tried: 870. Number failed: 22. Number tried: 871. Number failed: 22. Number tried: 872. Number failed: 22. Number tried: 873. Number failed: 22. Number tried: 874. Number failed: 22. Number tried: 875. Number failed: 22. Number tried: 876. Number failed: 22. Number tried: 877. Number failed: 22. Number tried: 878. Number failed: 22. Number tried: 879. Number failed: 22. Number tried: 880. Number failed: 22. Number tried: 881. Number failed: 22. Number tried: 882. Number failed: 22. Number tried: 883. Number failed: 22. Number tried: 884. Number failed: 22. Number tried: 885. Number failed: 22. Number tried: 886. Number failed: 22. Number tried: 887. Number failed: 22. Number tried: 888. Number failed: 22. Number tried: 889. Number failed: 22. Number tried: 890. Number failed: 22. Problem getting tweet: 759566828574212096 Number tried: 891. Number failed: 23. Number tried: 892. Number failed: 23. Number tried: 893. Number failed: 23. Number tried: 894. Number failed: 23. Number tried: 895. Number failed: 23. Number tried: 896. Number failed: 23. Number tried: 897. Number failed: 23. Number tried: 898. Number failed: 23. Number tried: 899. Number failed: 23. Number tried: 900. Number failed: 23. Number tried: 901. Number failed: 23. Number tried: 902. Number failed: 23. Number tried: 903. Number failed: 23. Number tried: 904. Number failed: 23. Number tried: 905. Number failed: 23. Number tried: 906. Number failed: 23. Number tried: 907. Number failed: 23. Number tried: 908. Number failed: 23. Number tried: 909. Number failed: 23. Number tried: 910. Number failed: 23. Number tried: 911. Number failed: 23. Number tried: 912. Number failed: 23. Number tried: 913. Number failed: 23. Number tried: 914. Number failed: 23. Number tried: 915. Number failed: 23. Number tried: 916. Number failed: 23. Number tried: 917. Number failed: 23. Number tried: 918. Number failed: 23. Number tried: 919. Number failed: 23. Number tried: 920. Number failed: 23. Number tried: 921. Number failed: 23. Number tried: 922. Number failed: 23. Number tried: 923. Number failed: 23. Number tried: 924. Number failed: 23. Number tried: 925. Number failed: 23. Number tried: 926. Number failed: 23. Number tried: 927. Number failed: 23. Number tried: 928. Number failed: 23. Number tried: 929. Number failed: 23. Number tried: 930. Number failed: 23. Number tried: 931. Number failed: 23. Number tried: 932. Number failed: 23. Problem getting tweet: 754011816964026368 Number tried: 933. Number failed: 24. Number tried: 934. Number failed: 24. Number tried: 935. Number failed: 24. Number tried: 936. Number failed: 24. Number tried: 937. Number failed: 24. Number tried: 938. Number failed: 24. Number tried: 939. Number failed: 24. Number tried: 940. Number failed: 24. Number tried: 941. Number failed: 24. Number tried: 942. Number failed: 24. Number tried: 943. Number failed: 24. Number tried: 944. Number failed: 24. Number tried: 945. Number failed: 24. Number tried: 946. Number failed: 24. Number tried: 947. Number failed: 24. Number tried: 948. Number failed: 24. Number tried: 949. Number failed: 24. Number tried: 950. Number failed: 24. Number tried: 951. Number failed: 24. Number tried: 952. Number failed: 24. Number tried: 953. Number failed: 24. Number tried: 954. Number failed: 24. Number tried: 955. Number failed: 24. Number tried: 956. Number failed: 24. Number tried: 957. Number failed: 24. Number tried: 958. Number failed: 24. Number tried: 959. Number failed: 24. Number tried: 960. Number failed: 24. Number tried: 961. Number failed: 24. Number tried: 962. Number failed: 24. Number tried: 963. Number failed: 24. Number tried: 964. Number failed: 24. Number tried: 965. Number failed: 24. Number tried: 966. Number failed: 24. Number tried: 967. Number failed: 24. Number tried: 968. Number failed: 24. Number tried: 969. Number failed: 24. Number tried: 970. Number failed: 24. Number tried: 971. Number failed: 24. Number tried: 972. Number failed: 24. Number tried: 973. Number failed: 24. Number tried: 974. Number failed: 24. Number tried: 975. Number failed: 24. Number tried: 976. Number failed: 24. Number tried: 977. Number failed: 24. Number tried: 978. Number failed: 24. Number tried: 979. Number failed: 24. Number tried: 980. Number failed: 24. Number tried: 981. Number failed: 24. Number tried: 982. Number failed: 24. Number tried: 983. Number failed: 24. Number tried: 984. Number failed: 24. Number tried: 985. Number failed: 24. Number tried: 986. Number failed: 24. Number tried: 987. Number failed: 24. Number tried: 988. Number failed: 24. Number tried: 989. Number failed: 24. Number tried: 990. Number failed: 24. Number tried: 991. Number failed: 24. Number tried: 992. Number failed: 24. Number tried: 993. Number failed: 24. Number tried: 994. Number failed: 24. Number tried: 995. Number failed: 24. Number tried: 996. Number failed: 24. Number tried: 997. Number failed: 24. Number tried: 998. Number failed: 24. Number tried: 999. Number failed: 24. Number tried: 1000. Number failed: 24. Number tried: 1001. Number failed: 24. Number tried: 1002. Number failed: 24. Number tried: 1003. Number failed: 24. Number tried: 1004. Number failed: 24. Number tried: 1005. Number failed: 24. Number tried: 1006. Number failed: 24. Number tried: 1007. Number failed: 24. Number tried: 1008. Number failed: 24. Number tried: 1009. Number failed: 24. Number tried: 1010. Number failed: 24. Number tried: 1011. Number failed: 24. Number tried: 1012. Number failed: 24. Number tried: 1013. Number failed: 24. Number tried: 1014. Number failed: 24. Number tried: 1015. Number failed: 24. Number tried: 1016. Number failed: 24. Number tried: 1017. Number failed: 24. Number tried: 1018. Number failed: 24. Number tried: 1019. Number failed: 24. Number tried: 1020. Number failed: 24. Number tried: 1021. Number failed: 24. Number tried: 1022. Number failed: 24. Number tried: 1023. Number failed: 24. Number tried: 1024. Number failed: 24. Number tried: 1025. Number failed: 24. Number tried: 1026. Number failed: 24. Number tried: 1027. Number failed: 24. Number tried: 1028. Number failed: 24. Number tried: 1029. Number failed: 24. Number tried: 1030. Number failed: 24. Number tried: 1031. Number failed: 24. Number tried: 1032. Number failed: 24. Number tried: 1033. Number failed: 24. Number tried: 1034. Number failed: 24. Number tried: 1035. Number failed: 24. Number tried: 1036. Number failed: 24. Number tried: 1037. Number failed: 24. Number tried: 1038. Number failed: 24. Number tried: 1039. Number failed: 24. Number tried: 1040. Number failed: 24. Number tried: 1041. Number failed: 24. Number tried: 1042. Number failed: 24. Number tried: 1043. Number failed: 24. Number tried: 1044. Number failed: 24. Number tried: 1045. Number failed: 24. Number tried: 1046. Number failed: 24. Number tried: 1047. Number failed: 24. Number tried: 1048. Number failed: 24. Number tried: 1049. Number failed: 24. Number tried: 1050. Number failed: 24. Number tried: 1051. Number failed: 24. Number tried: 1052. Number failed: 24. Number tried: 1053. Number failed: 24. Number tried: 1054. Number failed: 24. Number tried: 1055. Number failed: 24. Number tried: 1056. Number failed: 24. Number tried: 1057. Number failed: 24. Number tried: 1058. Number failed: 24. Number tried: 1059. Number failed: 24. Number tried: 1060. Number failed: 24. Number tried: 1061. Number failed: 24. Number tried: 1062. Number failed: 24. Number tried: 1063. Number failed: 24. Number tried: 1064. Number failed: 24. Number tried: 1065. Number failed: 24. Number tried: 1066. Number failed: 24. Number tried: 1067. Number failed: 24. Number tried: 1068. Number failed: 24. Number tried: 1069. Number failed: 24. Number tried: 1070. Number failed: 24. Number tried: 1071. Number failed: 24. Number tried: 1072. Number failed: 24. Number tried: 1073. Number failed: 24. Number tried: 1074. Number failed: 24. Number tried: 1075. Number failed: 24. Number tried: 1076. Number failed: 24. Number tried: 1077. Number failed: 24. Number tried: 1078. Number failed: 24. Number tried: 1079. Number failed: 24. Number tried: 1080. Number failed: 24. Number tried: 1081. Number failed: 24. Number tried: 1082. Number failed: 24. Number tried: 1083. Number failed: 24. Number tried: 1084. Number failed: 24. Number tried: 1085. Number failed: 24. Number tried: 1086. Number failed: 24. Number tried: 1087. Number failed: 24. Number tried: 1088. Number failed: 24. Number tried: 1089. Number failed: 24. Number tried: 1090. Number failed: 24. Number tried: 1091. Number failed: 24. Number tried: 1092. Number failed: 24. Number tried: 1093. Number failed: 24. Number tried: 1094. Number failed: 24. Number tried: 1095. Number failed: 24. Number tried: 1096. Number failed: 24. Number tried: 1097. Number failed: 24. Number tried: 1098. Number failed: 24. Number tried: 1099. Number failed: 24. Number tried: 1100. Number failed: 24. Number tried: 1101. Number failed: 24. Number tried: 1102. Number failed: 24. Number tried: 1103. Number failed: 24. Number tried: 1104. Number failed: 24. Number tried: 1105. Number failed: 24. Number tried: 1106. Number failed: 24. Number tried: 1107. Number failed: 24. Number tried: 1108. Number failed: 24. Number tried: 1109. Number failed: 24. Number tried: 1110. Number failed: 24. Number tried: 1111. Number failed: 24. Number tried: 1112. Number failed: 24. Number tried: 1113. Number failed: 24. Number tried: 1114. Number failed: 24. Number tried: 1115. Number failed: 24. Number tried: 1116. Number failed: 24. Number tried: 1117. Number failed: 24. Number tried: 1118. Number failed: 24. Number tried: 1119. Number failed: 24. Number tried: 1120. Number failed: 24. Number tried: 1121. Number failed: 24. Number tried: 1122. Number failed: 24. Number tried: 1123. Number failed: 24. Number tried: 1124. Number failed: 24. Number tried: 1125. Number failed: 24. Number tried: 1126. Number failed: 24. Number tried: 1127. Number failed: 24. Number tried: 1128. Number failed: 24. Number tried: 1129. Number failed: 24. Number tried: 1130. Number failed: 24. Number tried: 1131. Number failed: 24. Number tried: 1132. Number failed: 24. Number tried: 1133. Number failed: 24. Number tried: 1134. Number failed: 24. Number tried: 1135. Number failed: 24. Number tried: 1136. Number failed: 24. Number tried: 1137. Number failed: 24. Number tried: 1138. Number failed: 24. Number tried: 1139. Number failed: 24. Number tried: 1140. Number failed: 24. Number tried: 1141. Number failed: 24. Number tried: 1142. Number failed: 24. Number tried: 1143. Number failed: 24. Number tried: 1144. Number failed: 24. Number tried: 1145. Number failed: 24. Number tried: 1146. Number failed: 24. Number tried: 1147. Number failed: 24. Number tried: 1148. Number failed: 24. Number tried: 1149. Number failed: 24. Number tried: 1150. Number failed: 24. Number tried: 1151. Number failed: 24. Number tried: 1152. Number failed: 24. Number tried: 1153. Number failed: 24. Number tried: 1154. Number failed: 24. Number tried: 1155. Number failed: 24. Number tried: 1156. Number failed: 24. Number tried: 1157. Number failed: 24. Number tried: 1158. Number failed: 24. Number tried: 1159. Number failed: 24. Number tried: 1160. Number failed: 24. Number tried: 1161. Number failed: 24. Number tried: 1162. Number failed: 24. Number tried: 1163. Number failed: 24. Number tried: 1164. Number failed: 24. Number tried: 1165. Number failed: 24. Number tried: 1166. Number failed: 24. Number tried: 1167. Number failed: 24. Number tried: 1168. Number failed: 24. Number tried: 1169. Number failed: 24. Number tried: 1170. Number failed: 24. Number tried: 1171. Number failed: 24. Number tried: 1172. Number failed: 24. Number tried: 1173. Number failed: 24. Number tried: 1174. Number failed: 24. Number tried: 1175. Number failed: 24. Number tried: 1176. Number failed: 24. Number tried: 1177. Number failed: 24. Number tried: 1178. Number failed: 24. Number tried: 1179. Number failed: 24. Number tried: 1180. Number failed: 24. Number tried: 1181. Number failed: 24. Number tried: 1182. Number failed: 24. Number tried: 1183. Number failed: 24. Number tried: 1184. Number failed: 24. Number tried: 1185. Number failed: 24. Number tried: 1186. Number failed: 24. Number tried: 1187. Number failed: 24. Number tried: 1188. Number failed: 24. Number tried: 1189. Number failed: 24. Number tried: 1190. Number failed: 24. Number tried: 1191. Number failed: 24. Number tried: 1192. Number failed: 24. Number tried: 1193. Number failed: 24. Number tried: 1194. Number failed: 24. Number tried: 1195. Number failed: 24. Number tried: 1196. Number failed: 24. Number tried: 1197. Number failed: 24. Number tried: 1198. Number failed: 24. Number tried: 1199. Number failed: 24. Number tried: 1200. Number failed: 24. Number tried: 1201. Number failed: 24. Number tried: 1202. Number failed: 24. Number tried: 1203. Number failed: 24. Number tried: 1204. Number failed: 24. Number tried: 1205. Number failed: 24. Number tried: 1206. Number failed: 24. Number tried: 1207. Number failed: 24. Number tried: 1208. Number failed: 24. Number tried: 1209. Number failed: 24. Number tried: 1210. Number failed: 24. Number tried: 1211. Number failed: 24. Number tried: 1212. Number failed: 24. Number tried: 1213. Number failed: 24. Number tried: 1214. Number failed: 24. Number tried: 1215. Number failed: 24. Number tried: 1216. Number failed: 24. Number tried: 1217. Number failed: 24. Number tried: 1218. Number failed: 24. Number tried: 1219. Number failed: 24. Number tried: 1220. Number failed: 24. Number tried: 1221. Number failed: 24. Number tried: 1222. Number failed: 24. Number tried: 1223. Number failed: 24. Number tried: 1224. Number failed: 24. Number tried: 1225. Number failed: 24. Number tried: 1226. Number failed: 24. Number tried: 1227. Number failed: 24. Number tried: 1228. Number failed: 24. Number tried: 1229. Number failed: 24. Number tried: 1230. Number failed: 24. Number tried: 1231. Number failed: 24. Number tried: 1232. Number failed: 24. Number tried: 1233. Number failed: 24. Number tried: 1234. Number failed: 24. Number tried: 1235. Number failed: 24. Number tried: 1236. Number failed: 24. Number tried: 1237. Number failed: 24. Number tried: 1238. Number failed: 24. Number tried: 1239. Number failed: 24. Number tried: 1240. Number failed: 24. Number tried: 1241. Number failed: 24. Number tried: 1242. Number failed: 24. Number tried: 1243. Number failed: 24. Number tried: 1244. Number failed: 24. Number tried: 1245. Number failed: 24. Number tried: 1246. Number failed: 24. Number tried: 1247. Number failed: 24. Number tried: 1248. Number failed: 24. Number tried: 1249. Number failed: 24. Number tried: 1250. Number failed: 24. Number tried: 1251. Number failed: 24. Number tried: 1252. Number failed: 24. Number tried: 1253. Number failed: 24. Number tried: 1254. Number failed: 24. Number tried: 1255. Number failed: 24. Number tried: 1256. Number failed: 24. Number tried: 1257. Number failed: 24. Number tried: 1258. Number failed: 24. Number tried: 1259. Number failed: 24. Number tried: 1260. Number failed: 24. Number tried: 1261. Number failed: 24. Number tried: 1262. Number failed: 24. Number tried: 1263. Number failed: 24. Number tried: 1264. Number failed: 24. Number tried: 1265. Number failed: 24. Number tried: 1266. Number failed: 24. Number tried: 1267. Number failed: 24. Number tried: 1268. Number failed: 24. Number tried: 1269. Number failed: 24. Number tried: 1270. Number failed: 24. Number tried: 1271. Number failed: 24. Number tried: 1272. Number failed: 24. Number tried: 1273. Number failed: 24. Number tried: 1274. Number failed: 24. Number tried: 1275. Number failed: 24. Number tried: 1276. Number failed: 24. Number tried: 1277. Number failed: 24. Number tried: 1278. Number failed: 24. Number tried: 1279. Number failed: 24. Number tried: 1280. Number failed: 24. Number tried: 1281. Number failed: 24. Number tried: 1282. Number failed: 24. Number tried: 1283. Number failed: 24. Number tried: 1284. Number failed: 24. Number tried: 1285. Number failed: 24. Number tried: 1286. Number failed: 24. Number tried: 1287. Number failed: 24. Number tried: 1288. Number failed: 24. Number tried: 1289. Number failed: 24. Number tried: 1290. Number failed: 24. Number tried: 1291. Number failed: 24. Number tried: 1292. Number failed: 24. Number tried: 1293. Number failed: 24. Number tried: 1294. Number failed: 24. Number tried: 1295. Number failed: 24. Number tried: 1296. Number failed: 24. Number tried: 1297. Number failed: 24. Number tried: 1298. Number failed: 24. Number tried: 1299. Number failed: 24. Number tried: 1300. Number failed: 24. Number tried: 1301. Number failed: 24. Number tried: 1302. Number failed: 24. Number tried: 1303. Number failed: 24. Number tried: 1304. Number failed: 24. Number tried: 1305. Number failed: 24. Number tried: 1306. Number failed: 24. Number tried: 1307. Number failed: 24. Number tried: 1308. Number failed: 24. Number tried: 1309. Number failed: 24. Number tried: 1310. Number failed: 24. Number tried: 1311. Number failed: 24. Number tried: 1312. Number failed: 24. Number tried: 1313. Number failed: 24. Number tried: 1314. Number failed: 24. Number tried: 1315. Number failed: 24. Number tried: 1316. Number failed: 24. Number tried: 1317. Number failed: 24. Number tried: 1318. Number failed: 24. Number tried: 1319. Number failed: 24. Number tried: 1320. Number failed: 24. Number tried: 1321. Number failed: 24. Number tried: 1322. Number failed: 24. Number tried: 1323. Number failed: 24. Number tried: 1324. Number failed: 24. Number tried: 1325. Number failed: 24. Number tried: 1326. Number failed: 24. Number tried: 1327. Number failed: 24. Number tried: 1328. Number failed: 24. Number tried: 1329. Number failed: 24. Number tried: 1330. Number failed: 24. Number tried: 1331. Number failed: 24. Number tried: 1332. Number failed: 24. Number tried: 1333. Number failed: 24. Number tried: 1334. Number failed: 24. Number tried: 1335. Number failed: 24. Number tried: 1336. Number failed: 24. Number tried: 1337. Number failed: 24. Number tried: 1338. Number failed: 24. Number tried: 1339. Number failed: 24. Number tried: 1340. Number failed: 24. Number tried: 1341. Number failed: 24. Number tried: 1342. Number failed: 24. Number tried: 1343. Number failed: 24. Number tried: 1344. Number failed: 24. Number tried: 1345. Number failed: 24. Number tried: 1346. Number failed: 24. Number tried: 1347. Number failed: 24. Number tried: 1348. Number failed: 24. Number tried: 1349. Number failed: 24. Number tried: 1350. Number failed: 24. Number tried: 1351. Number failed: 24. Number tried: 1352. Number failed: 24. Number tried: 1353. Number failed: 24. Number tried: 1354. Number failed: 24. Number tried: 1355. Number failed: 24. Number tried: 1356. Number failed: 24. Number tried: 1357. Number failed: 24. Number tried: 1358. Number failed: 24. Number tried: 1359. Number failed: 24. Number tried: 1360. Number failed: 24. Number tried: 1361. Number failed: 24. Number tried: 1362. Number failed: 24. Number tried: 1363. Number failed: 24. Number tried: 1364. Number failed: 24. Number tried: 1365. Number failed: 24. Number tried: 1366. Number failed: 24. Number tried: 1367. Number failed: 24. Number tried: 1368. Number failed: 24. Number tried: 1369. Number failed: 24. Number tried: 1370. Number failed: 24. Number tried: 1371. Number failed: 24. Number tried: 1372. Number failed: 24. Number tried: 1373. Number failed: 24. Number tried: 1374. Number failed: 24. Number tried: 1375. Number failed: 24. Number tried: 1376. Number failed: 24. Number tried: 1377. Number failed: 24. Number tried: 1378. Number failed: 24. Number tried: 1379. Number failed: 24. Number tried: 1380. Number failed: 24. Number tried: 1381. Number failed: 24. Number tried: 1382. Number failed: 24. Number tried: 1383. Number failed: 24. Number tried: 1384. Number failed: 24. Number tried: 1385. Number failed: 24. Number tried: 1386. Number failed: 24. Number tried: 1387. Number failed: 24. Number tried: 1388. Number failed: 24. Number tried: 1389. Number failed: 24. Number tried: 1390. Number failed: 24. Number tried: 1391. Number failed: 24. Number tried: 1392. Number failed: 24. Number tried: 1393. Number failed: 24. Number tried: 1394. Number failed: 24. Number tried: 1395. Number failed: 24. Number tried: 1396. Number failed: 24. Number tried: 1397. Number failed: 24. Number tried: 1398. Number failed: 24. Number tried: 1399. Number failed: 24. Number tried: 1400. Number failed: 24. Number tried: 1401. Number failed: 24. Number tried: 1402. Number failed: 24. Number tried: 1403. Number failed: 24. Number tried: 1404. Number failed: 24. Number tried: 1405. Number failed: 24. Number tried: 1406. Number failed: 24. Number tried: 1407. Number failed: 24. Number tried: 1408. Number failed: 24. Number tried: 1409. Number failed: 24. Number tried: 1410. Number failed: 24. Number tried: 1411. Number failed: 24. Number tried: 1412. Number failed: 24. Number tried: 1413. Number failed: 24. Number tried: 1414. Number failed: 24. Number tried: 1415. Number failed: 24. Number tried: 1416. Number failed: 24. Number tried: 1417. Number failed: 24. Number tried: 1418. Number failed: 24. Number tried: 1419. Number failed: 24. Number tried: 1420. Number failed: 24. Number tried: 1421. Number failed: 24. Number tried: 1422. Number failed: 24. Number tried: 1423. Number failed: 24. Number tried: 1424. Number failed: 24. Number tried: 1425. Number failed: 24. Number tried: 1426. Number failed: 24. Number tried: 1427. Number failed: 24. Number tried: 1428. Number failed: 24. Number tried: 1429. Number failed: 24. Number tried: 1430. Number failed: 24. Number tried: 1431. Number failed: 24. Number tried: 1432. Number failed: 24. Number tried: 1433. Number failed: 24. Number tried: 1434. Number failed: 24. Number tried: 1435. Number failed: 24. Number tried: 1436. Number failed: 24. Number tried: 1437. Number failed: 24. Number tried: 1438. Number failed: 24. Number tried: 1439. Number failed: 24. Number tried: 1440. Number failed: 24. Number tried: 1441. Number failed: 24. Number tried: 1442. Number failed: 24. Number tried: 1443. Number failed: 24. Number tried: 1444. Number failed: 24. Number tried: 1445. Number failed: 24. Number tried: 1446. Number failed: 24. Number tried: 1447. Number failed: 24. Number tried: 1448. Number failed: 24. Number tried: 1449. Number failed: 24. Number tried: 1450. Number failed: 24. Number tried: 1451. Number failed: 24. Number tried: 1452. Number failed: 24. Number tried: 1453. Number failed: 24. Number tried: 1454. Number failed: 24. Number tried: 1455. Number failed: 24. Number tried: 1456. Number failed: 24. Number tried: 1457. Number failed: 24. Number tried: 1458. Number failed: 24. Number tried: 1459. Number failed: 24. Number tried: 1460. Number failed: 24. Number tried: 1461. Number failed: 24. Number tried: 1462. Number failed: 24. Number tried: 1463. Number failed: 24. Number tried: 1464. Number failed: 24. Number tried: 1465. Number failed: 24. Number tried: 1466. Number failed: 24. Number tried: 1467. Number failed: 24. Number tried: 1468. Number failed: 24. Number tried: 1469. Number failed: 24. Number tried: 1470. Number failed: 24. Number tried: 1471. Number failed: 24. Number tried: 1472. Number failed: 24. Number tried: 1473. Number failed: 24. Number tried: 1474. Number failed: 24. Number tried: 1475. Number failed: 24. Number tried: 1476. Number failed: 24. Number tried: 1477. Number failed: 24. Number tried: 1478. Number failed: 24. Number tried: 1479. Number failed: 24. Number tried: 1480. Number failed: 24. Number tried: 1481. Number failed: 24. Number tried: 1482. Number failed: 24. Number tried: 1483. Number failed: 24. Number tried: 1484. Number failed: 24. Number tried: 1485. Number failed: 24. Number tried: 1486. Number failed: 24. Number tried: 1487. Number failed: 24. Number tried: 1488. Number failed: 24. Number tried: 1489. Number failed: 24. Number tried: 1490. Number failed: 24. Number tried: 1491. Number failed: 24. Number tried: 1492. Number failed: 24. Number tried: 1493. Number failed: 24. Number tried: 1494. Number failed: 24. Number tried: 1495. Number failed: 24. Number tried: 1496. Number failed: 24. Number tried: 1497. Number failed: 24. Number tried: 1498. Number failed: 24. Number tried: 1499. Number failed: 24. Number tried: 1500. Number failed: 24. Number tried: 1501. Number failed: 24. Number tried: 1502. Number failed: 24. Number tried: 1503. Number failed: 24. Number tried: 1504. Number failed: 24. Number tried: 1505. Number failed: 24. Number tried: 1506. Number failed: 24. Number tried: 1507. Number failed: 24. Number tried: 1508. Number failed: 24. Number tried: 1509. Number failed: 24. Number tried: 1510. Number failed: 24. Number tried: 1511. Number failed: 24. Number tried: 1512. Number failed: 24. Number tried: 1513. Number failed: 24. Number tried: 1514. Number failed: 24. Number tried: 1515. Number failed: 24. Number tried: 1516. Number failed: 24. Number tried: 1517. Number failed: 24. Number tried: 1518. Number failed: 24. Number tried: 1519. Number failed: 24. Number tried: 1520. Number failed: 24. Number tried: 1521. Number failed: 24. Number tried: 1522. Number failed: 24. Number tried: 1523. Number failed: 24. Number tried: 1524. Number failed: 24. Number tried: 1525. Number failed: 24. Number tried: 1526. Number failed: 24. Number tried: 1527. Number failed: 24. Number tried: 1528. Number failed: 24. Number tried: 1529. Number failed: 24. Number tried: 1530. Number failed: 24. Number tried: 1531. Number failed: 24. Number tried: 1532. Number failed: 24. Number tried: 1533. Number failed: 24. Number tried: 1534. Number failed: 24. Number tried: 1535. Number failed: 24. Number tried: 1536. Number failed: 24. Number tried: 1537. Number failed: 24. Number tried: 1538. Number failed: 24. Number tried: 1539. Number failed: 24. Number tried: 1540. Number failed: 24. Number tried: 1541. Number failed: 24. Number tried: 1542. Number failed: 24. Number tried: 1543. Number failed: 24. Number tried: 1544. Number failed: 24. Number tried: 1545. Number failed: 24. Number tried: 1546. Number failed: 24. Number tried: 1547. Number failed: 24. Number tried: 1548. Number failed: 24. Number tried: 1549. Number failed: 24. Number tried: 1550. Number failed: 24. Number tried: 1551. Number failed: 24. Number tried: 1552. Number failed: 24. Number tried: 1553. Number failed: 24. Number tried: 1554. Number failed: 24. Number tried: 1555. Number failed: 24. Number tried: 1556. Number failed: 24. Number tried: 1557. Number failed: 24. Number tried: 1558. Number failed: 24. Number tried: 1559. Number failed: 24. Number tried: 1560. Number failed: 24. Number tried: 1561. Number failed: 24. Number tried: 1562. Number failed: 24. Number tried: 1563. Number failed: 24. Number tried: 1564. Number failed: 24. Number tried: 1565. Number failed: 24. Number tried: 1566. Number failed: 24. Number tried: 1567. Number failed: 24. Number tried: 1568. Number failed: 24. Number tried: 1569. Number failed: 24. Number tried: 1570. Number failed: 24. Number tried: 1571. Number failed: 24. Number tried: 1572. Number failed: 24. Number tried: 1573. Number failed: 24. Number tried: 1574. Number failed: 24. Number tried: 1575. Number failed: 24. Number tried: 1576. Number failed: 24. Number tried: 1577. Number failed: 24. Number tried: 1578. Number failed: 24. Number tried: 1579. Number failed: 24. Number tried: 1580. Number failed: 24. Number tried: 1581. Number failed: 24. Number tried: 1582. Number failed: 24. Number tried: 1583. Number failed: 24. Number tried: 1584. Number failed: 24. Number tried: 1585. Number failed: 24. Number tried: 1586. Number failed: 24. Number tried: 1587. Number failed: 24. Number tried: 1588. Number failed: 24. Number tried: 1589. Number failed: 24. Number tried: 1590. Number failed: 24. Number tried: 1591. Number failed: 24. Number tried: 1592. Number failed: 24. Number tried: 1593. Number failed: 24. Number tried: 1594. Number failed: 24. Number tried: 1595. Number failed: 24. Number tried: 1596. Number failed: 24. Number tried: 1597. Number failed: 24. Number tried: 1598. Number failed: 24. Number tried: 1599. Number failed: 24. Number tried: 1600. Number failed: 24. Number tried: 1601. Number failed: 24. Number tried: 1602. Number failed: 24. Number tried: 1603. Number failed: 24. Number tried: 1604. Number failed: 24. Number tried: 1605. Number failed: 24. Number tried: 1606. Number failed: 24. Number tried: 1607. Number failed: 24. Number tried: 1608. Number failed: 24. Number tried: 1609. Number failed: 24. Number tried: 1610. Number failed: 24. Number tried: 1611. Number failed: 24. Number tried: 1612. Number failed: 24. Number tried: 1613. Number failed: 24. Number tried: 1614. Number failed: 24. Number tried: 1615. Number failed: 24. Number tried: 1616. Number failed: 24. Number tried: 1617. Number failed: 24. Number tried: 1618. Number failed: 24. Number tried: 1619. Number failed: 24. Number tried: 1620. Number failed: 24. Number tried: 1621. Number failed: 24. Number tried: 1622. Number failed: 24. Number tried: 1623. Number failed: 24. Number tried: 1624. Number failed: 24. Number tried: 1625. Number failed: 24. Number tried: 1626. Number failed: 24. Number tried: 1627. Number failed: 24. Number tried: 1628. Number failed: 24. Number tried: 1629. Number failed: 24. Number tried: 1630. Number failed: 24. Number tried: 1631. Number failed: 24. Number tried: 1632. Number failed: 24. Number tried: 1633. Number failed: 24. Number tried: 1634. Number failed: 24. Number tried: 1635. Number failed: 24. Number tried: 1636. Number failed: 24. Number tried: 1637. Number failed: 24. Number tried: 1638. Number failed: 24. Number tried: 1639. Number failed: 24. Number tried: 1640. Number failed: 24. Number tried: 1641. Number failed: 24. Number tried: 1642. Number failed: 24. Number tried: 1643. Number failed: 24. Number tried: 1644. Number failed: 24. Number tried: 1645. Number failed: 24. Number tried: 1646. Number failed: 24. Number tried: 1647. Number failed: 24. Number tried: 1648. Number failed: 24. Number tried: 1649. Number failed: 24. Number tried: 1650. Number failed: 24. Number tried: 1651. Number failed: 24. Number tried: 1652. Number failed: 24. Number tried: 1653. Number failed: 24. Number tried: 1654. Number failed: 24. Number tried: 1655. Number failed: 24. Number tried: 1656. Number failed: 24. Number tried: 1657. Number failed: 24. Number tried: 1658. Number failed: 24. Number tried: 1659. Number failed: 24. Number tried: 1660. Number failed: 24. Number tried: 1661. Number failed: 24. Number tried: 1662. Number failed: 24. Number tried: 1663. Number failed: 24. Number tried: 1664. Number failed: 24. Number tried: 1665. Number failed: 24. Number tried: 1666. Number failed: 24. Number tried: 1667. Number failed: 24. Number tried: 1668. Number failed: 24. Number tried: 1669. Number failed: 24. Number tried: 1670. Number failed: 24. Number tried: 1671. Number failed: 24. Number tried: 1672. Number failed: 24. Number tried: 1673. Number failed: 24. Number tried: 1674. Number failed: 24. Number tried: 1675. Number failed: 24. Number tried: 1676. Number failed: 24. Number tried: 1677. Number failed: 24. Number tried: 1678. Number failed: 24. Number tried: 1679. Number failed: 24. Number tried: 1680. Number failed: 24. Number tried: 1681. Number failed: 24. Number tried: 1682. Number failed: 24. Number tried: 1683. Number failed: 24. Number tried: 1684. Number failed: 24. Number tried: 1685. Number failed: 24. Number tried: 1686. Number failed: 24. Number tried: 1687. Number failed: 24. Number tried: 1688. Number failed: 24. Number tried: 1689. Number failed: 24. Number tried: 1690. Number failed: 24. Number tried: 1691. Number failed: 24. Number tried: 1692. Number failed: 24. Number tried: 1693. Number failed: 24. Number tried: 1694. Number failed: 24. Number tried: 1695. Number failed: 24. Number tried: 1696. Number failed: 24. Number tried: 1697. Number failed: 24. Number tried: 1698. Number failed: 24. Number tried: 1699. Number failed: 24. Number tried: 1700. Number failed: 24. Number tried: 1701. Number failed: 24. Number tried: 1702. Number failed: 24. Number tried: 1703. Number failed: 24. Number tried: 1704. Number failed: 24. Number tried: 1705. Number failed: 24. Number tried: 1706. Number failed: 24. Number tried: 1707. Number failed: 24. Number tried: 1708. Number failed: 24. Number tried: 1709. Number failed: 24. Number tried: 1710. Number failed: 24. Number tried: 1711. Number failed: 24. Number tried: 1712. Number failed: 24. Number tried: 1713. Number failed: 24. Number tried: 1714. Number failed: 24. Number tried: 1715. Number failed: 24. Number tried: 1716. Number failed: 24. Number tried: 1717. Number failed: 24. Number tried: 1718. Number failed: 24. Number tried: 1719. Number failed: 24. Number tried: 1720. Number failed: 24. Number tried: 1721. Number failed: 24. Number tried: 1722. Number failed: 24. Number tried: 1723. Number failed: 24. Number tried: 1724. Number failed: 24. Number tried: 1725. Number failed: 24. Number tried: 1726. Number failed: 24. Problem getting tweet: 680055455951884288 Number tried: 1727. Number failed: 25. Number tried: 1728. Number failed: 25. Number tried: 1729. Number failed: 25. Number tried: 1730. Number failed: 25. Number tried: 1731. Number failed: 25. Number tried: 1732. Number failed: 25. Number tried: 1733. Number failed: 25. Number tried: 1734. Number failed: 25. Number tried: 1735. Number failed: 25. Number tried: 1736. Number failed: 25. Number tried: 1737. Number failed: 25. Number tried: 1738. Number failed: 25. Number tried: 1739. Number failed: 25. Number tried: 1740. Number failed: 25. Number tried: 1741. Number failed: 25. Number tried: 1742. Number failed: 25. Number tried: 1743. Number failed: 25. Number tried: 1744. Number failed: 25. Number tried: 1745. Number failed: 25. Number tried: 1746. Number failed: 25. Number tried: 1747. Number failed: 25. Number tried: 1748. Number failed: 25. Number tried: 1749. Number failed: 25. Number tried: 1750. Number failed: 25. Number tried: 1751. Number failed: 25. Number tried: 1752. Number failed: 25. Number tried: 1753. Number failed: 25. Number tried: 1754. Number failed: 25. Number tried: 1755. Number failed: 25. Number tried: 1756. Number failed: 25. Number tried: 1757. Number failed: 25. Number tried: 1758. Number failed: 25. Number tried: 1759. Number failed: 25. Number tried: 1760. Number failed: 25. Number tried: 1761. Number failed: 25. Number tried: 1762. Number failed: 25. Number tried: 1763. Number failed: 25. Number tried: 1764. Number failed: 25. Number tried: 1765. Number failed: 25. Number tried: 1766. Number failed: 25. Number tried: 1767. Number failed: 25. Number tried: 1768. Number failed: 25. Number tried: 1769. Number failed: 25. Number tried: 1770. Number failed: 25. Number tried: 1771. Number failed: 25. Number tried: 1772. Number failed: 25. Number tried: 1773. Number failed: 25. Number tried: 1774. Number failed: 25. Number tried: 1775. Number failed: 25. Number tried: 1776. Number failed: 25. Number tried: 1777. Number failed: 25. Number tried: 1778. Number failed: 25. Number tried: 1779. Number failed: 25. Number tried: 1780. Number failed: 25. Number tried: 1781. Number failed: 25. Number tried: 1782. Number failed: 25. Number tried: 1783. Number failed: 25. Number tried: 1784. Number failed: 25. Number tried: 1785. Number failed: 25. Number tried: 1786. Number failed: 25. Number tried: 1787. Number failed: 25. Number tried: 1788. Number failed: 25. Number tried: 1789. Number failed: 25. Number tried: 1790. Number failed: 25. Number tried: 1791. Number failed: 25. Number tried: 1792. Number failed: 25. Number tried: 1793. Number failed: 25. Number tried: 1794. Number failed: 25. Number tried: 1795. Number failed: 25. Number tried: 1796. Number failed: 25. Number tried: 1797. Number failed: 25. Number tried: 1798. Number failed: 25. Number tried: 1799. Number failed: 25. Number tried: 1800. Number failed: 25. Number tried: 1801. Number failed: 25. Number tried: 1802. Number failed: 25. Number tried: 1803. Number failed: 25. Number tried: 1804. Number failed: 25. Number tried: 1805. Number failed: 25. Number tried: 1806. Number failed: 25. Number tried: 1807. Number failed: 25. Number tried: 1808. Number failed: 25. Number tried: 1809. Number failed: 25. Number tried: 1810. Number failed: 25. Number tried: 1811. Number failed: 25. Number tried: 1812. Number failed: 25. Number tried: 1813. Number failed: 25. Number tried: 1814. Number failed: 25. Number tried: 1815. Number failed: 25. Number tried: 1816. Number failed: 25. Number tried: 1817. Number failed: 25. Number tried: 1818. Number failed: 25. Number tried: 1819. Number failed: 25. Number tried: 1820. Number failed: 25. Number tried: 1821. Number failed: 25. Number tried: 1822. Number failed: 25. Number tried: 1823. Number failed: 25. Number tried: 1824. Number failed: 25. Number tried: 1825. Number failed: 25. Number tried: 1826. Number failed: 25. Number tried: 1827. Number failed: 25. Number tried: 1828. Number failed: 25. Number tried: 1829. Number failed: 25. Number tried: 1830. Number failed: 25. Number tried: 1831. Number failed: 25. Number tried: 1832. Number failed: 25. Number tried: 1833. Number failed: 25. Number tried: 1834. Number failed: 25. Number tried: 1835. Number failed: 25. Number tried: 1836. Number failed: 25. Number tried: 1837. Number failed: 25. Number tried: 1838. Number failed: 25. Number tried: 1839. Number failed: 25. Number tried: 1840. Number failed: 25. Number tried: 1841. Number failed: 25. Number tried: 1842. Number failed: 25. Number tried: 1843. Number failed: 25. Number tried: 1844. Number failed: 25. Number tried: 1845. Number failed: 25. Number tried: 1846. Number failed: 25. Number tried: 1847. Number failed: 25. Number tried: 1848. Number failed: 25. Number tried: 1849. Number failed: 25. Number tried: 1850. Number failed: 25. Number tried: 1851. Number failed: 25. Number tried: 1852. Number failed: 25. Number tried: 1853. Number failed: 25. Number tried: 1854. Number failed: 25. Number tried: 1855. Number failed: 25. Number tried: 1856. Number failed: 25. Number tried: 1857. Number failed: 25. Number tried: 1858. Number failed: 25. Number tried: 1859. Number failed: 25. Number tried: 1860. Number failed: 25. Number tried: 1861. Number failed: 25. Number tried: 1862. Number failed: 25. Number tried: 1863. Number failed: 25. Number tried: 1864. Number failed: 25. Number tried: 1865. Number failed: 25. Number tried: 1866. Number failed: 25. Number tried: 1867. Number failed: 25. Number tried: 1868. Number failed: 25. Number tried: 1869. Number failed: 25. Number tried: 1870. Number failed: 25. Number tried: 1871. Number failed: 25. Number tried: 1872. Number failed: 25. Number tried: 1873. Number failed: 25. Number tried: 1874. Number failed: 25. Number tried: 1875. Number failed: 25. Number tried: 1876. Number failed: 25. Number tried: 1877. Number failed: 25. Number tried: 1878. Number failed: 25. Number tried: 1879. Number failed: 25. Number tried: 1880. Number failed: 25. Number tried: 1881. Number failed: 25. Number tried: 1882. Number failed: 25. Number tried: 1883. Number failed: 25. Number tried: 1884. Number failed: 25. Number tried: 1885. Number failed: 25. Number tried: 1886. Number failed: 25. Number tried: 1887. Number failed: 25. Number tried: 1888. Number failed: 25. Number tried: 1889. Number failed: 25. Number tried: 1890. Number failed: 25. Number tried: 1891. Number failed: 25. Number tried: 1892. Number failed: 25. Number tried: 1893. Number failed: 25. Number tried: 1894. Number failed: 25. Number tried: 1895. Number failed: 25. Number tried: 1896. Number failed: 25. Number tried: 1897. Number failed: 25. Number tried: 1898. Number failed: 25. Number tried: 1899. Number failed: 25. Number tried: 1900. Number failed: 25. Number tried: 1901. Number failed: 25. Number tried: 1902. Number failed: 25. Number tried: 1903. Number failed: 25. Number tried: 1904. Number failed: 25. Number tried: 1905. Number failed: 25. Number tried: 1906. Number failed: 25. Number tried: 1907. Number failed: 25. Number tried: 1908. Number failed: 25. Number tried: 1909. Number failed: 25. Number tried: 1910. Number failed: 25. Number tried: 1911. Number failed: 25. Number tried: 1912. Number failed: 25. Number tried: 1913. Number failed: 25. Number tried: 1914. Number failed: 25. Number tried: 1915. Number failed: 25. Number tried: 1916. Number failed: 25. Number tried: 1917. Number failed: 25. Number tried: 1918. Number failed: 25. Number tried: 1919. Number failed: 25. Number tried: 1920. Number failed: 25. Number tried: 1921. Number failed: 25. Number tried: 1922. Number failed: 25. Number tried: 1923. Number failed: 25. Number tried: 1924. Number failed: 25. Number tried: 1925. Number failed: 25. Number tried: 1926. Number failed: 25. Number tried: 1927. Number failed: 25. Number tried: 1928. Number failed: 25. Number tried: 1929. Number failed: 25. Number tried: 1930. Number failed: 25. Number tried: 1931. Number failed: 25. Number tried: 1932. Number failed: 25. Number tried: 1933. Number failed: 25. Number tried: 1934. Number failed: 25. Number tried: 1935. Number failed: 25. Number tried: 1936. Number failed: 25. Number tried: 1937. Number failed: 25. Number tried: 1938. Number failed: 25. Number tried: 1939. Number failed: 25. Number tried: 1940. Number failed: 25. Number tried: 1941. Number failed: 25. Number tried: 1942. Number failed: 25. Number tried: 1943. Number failed: 25. Number tried: 1944. Number failed: 25. Number tried: 1945. Number failed: 25. Number tried: 1946. Number failed: 25. Number tried: 1947. Number failed: 25. Number tried: 1948. Number failed: 25. Number tried: 1949. Number failed: 25. Number tried: 1950. Number failed: 25. Number tried: 1951. Number failed: 25. Number tried: 1952. Number failed: 25. Number tried: 1953. Number failed: 25. Number tried: 1954. Number failed: 25. Number tried: 1955. Number failed: 25. Number tried: 1956. Number failed: 25. Number tried: 1957. Number failed: 25. Number tried: 1958. Number failed: 25. Number tried: 1959. Number failed: 25. Number tried: 1960. Number failed: 25. Number tried: 1961. Number failed: 25. Number tried: 1962. Number failed: 25. Number tried: 1963. Number failed: 25. Number tried: 1964. Number failed: 25. Number tried: 1965. Number failed: 25. Number tried: 1966. Number failed: 25. Number tried: 1967. Number failed: 25. Number tried: 1968. Number failed: 25. Number tried: 1969. Number failed: 25. Number tried: 1970. Number failed: 25. Number tried: 1971. Number failed: 25. Number tried: 1972. Number failed: 25. Number tried: 1973. Number failed: 25. Number tried: 1974. Number failed: 25. Number tried: 1975. Number failed: 25. Number tried: 1976. Number failed: 25. Number tried: 1977. Number failed: 25. Number tried: 1978. Number failed: 25. Number tried: 1979. Number failed: 25. Number tried: 1980. Number failed: 25. Number tried: 1981. Number failed: 25. Number tried: 1982. Number failed: 25. Number tried: 1983. Number failed: 25. Number tried: 1984. Number failed: 25. Number tried: 1985. Number failed: 25. Number tried: 1986. Number failed: 25. Number tried: 1987. Number failed: 25. Number tried: 1988. Number failed: 25. Number tried: 1989. Number failed: 25. Number tried: 1990. Number failed: 25. Number tried: 1991. Number failed: 25. Number tried: 1992. Number failed: 25. Number tried: 1993. Number failed: 25. Number tried: 1994. Number failed: 25. Number tried: 1995. Number failed: 25. Number tried: 1996. Number failed: 25. Number tried: 1997. Number failed: 25. Number tried: 1998. Number failed: 25. Number tried: 1999. Number failed: 25. Number tried: 2000. Number failed: 25. Number tried: 2001. Number failed: 25. Number tried: 2002. Number failed: 25. Number tried: 2003. Number failed: 25. Number tried: 2004. Number failed: 25. Number tried: 2005. Number failed: 25. Number tried: 2006. Number failed: 25. Number tried: 2007. Number failed: 25. Number tried: 2008. Number failed: 25. Number tried: 2009. Number failed: 25. Number tried: 2010. Number failed: 25. Number tried: 2011. Number failed: 25. Number tried: 2012. Number failed: 25. Number tried: 2013. Number failed: 25. Number tried: 2014. Number failed: 25. Number tried: 2015. Number failed: 25. Number tried: 2016. Number failed: 25. Number tried: 2017. Number failed: 25. Number tried: 2018. Number failed: 25. Number tried: 2019. Number failed: 25. Number tried: 2020. Number failed: 25. Number tried: 2021. Number failed: 25. Number tried: 2022. Number failed: 25. Number tried: 2023. Number failed: 25. Number tried: 2024. Number failed: 25. Number tried: 2025. Number failed: 25. Number tried: 2026. Number failed: 25. Number tried: 2027. Number failed: 25. Number tried: 2028. Number failed: 25. Number tried: 2029. Number failed: 25. Number tried: 2030. Number failed: 25. Number tried: 2031. Number failed: 25. Number tried: 2032. Number failed: 25. Number tried: 2033. Number failed: 25. Number tried: 2034. Number failed: 25. Number tried: 2035. Number failed: 25. Number tried: 2036. Number failed: 25. Number tried: 2037. Number failed: 25. Number tried: 2038. Number failed: 25. Number tried: 2039. Number failed: 25. Number tried: 2040. Number failed: 25. Number tried: 2041. Number failed: 25. Number tried: 2042. Number failed: 25. Number tried: 2043. Number failed: 25. Number tried: 2044. Number failed: 25. Number tried: 2045. Number failed: 25. Number tried: 2046. Number failed: 25. Number tried: 2047. Number failed: 25. Number tried: 2048. Number failed: 25. Number tried: 2049. Number failed: 25. Number tried: 2050. Number failed: 25. Number tried: 2051. Number failed: 25. Number tried: 2052. Number failed: 25. Number tried: 2053. Number failed: 25. Number tried: 2054. Number failed: 25. Number tried: 2055. Number failed: 25. Number tried: 2056. Number failed: 25. Number tried: 2057. Number failed: 25. Number tried: 2058. Number failed: 25. Number tried: 2059. Number failed: 25. Number tried: 2060. Number failed: 25. Number tried: 2061. Number failed: 25. Number tried: 2062. Number failed: 25. Number tried: 2063. Number failed: 25. Number tried: 2064. Number failed: 25. Number tried: 2065. Number failed: 25. Number tried: 2066. Number failed: 25. Number tried: 2067. Number failed: 25. Number tried: 2068. Number failed: 25. Number tried: 2069. Number failed: 25. Number tried: 2070. Number failed: 25. Number tried: 2071. Number failed: 25. Number tried: 2072. Number failed: 25. Number tried: 2073. Number failed: 25. Number tried: 2074. Number failed: 25. Number tried: 2075. Number failed: 25. Number tried: 2076. Number failed: 25. Number tried: 2077. Number failed: 25. Number tried: 2078. Number failed: 25. Number tried: 2079. Number failed: 25. Number tried: 2080. Number failed: 25. Number tried: 2081. Number failed: 25. Number tried: 2082. Number failed: 25. Number tried: 2083. Number failed: 25. Number tried: 2084. Number failed: 25. Number tried: 2085. Number failed: 25. Number tried: 2086. Number failed: 25. Number tried: 2087. Number failed: 25. Number tried: 2088. Number failed: 25. Number tried: 2089. Number failed: 25. Number tried: 2090. Number failed: 25. Number tried: 2091. Number failed: 25. Number tried: 2092. Number failed: 25. Number tried: 2093. Number failed: 25. Number tried: 2094. Number failed: 25. Number tried: 2095. Number failed: 25. Number tried: 2096. Number failed: 25. Number tried: 2097. Number failed: 25. Number tried: 2098. Number failed: 25. Number tried: 2099. Number failed: 25. Number tried: 2100. Number failed: 25. Number tried: 2101. Number failed: 25. Number tried: 2102. Number failed: 25. Number tried: 2103. Number failed: 25. Number tried: 2104. Number failed: 25. Number tried: 2105. Number failed: 25. Number tried: 2106. Number failed: 25. Number tried: 2107. Number failed: 25. Number tried: 2108. Number failed: 25. Number tried: 2109. Number failed: 25. Number tried: 2110. Number failed: 25. Number tried: 2111. Number failed: 25. Number tried: 2112. Number failed: 25. Number tried: 2113. Number failed: 25. Number tried: 2114. Number failed: 25. Number tried: 2115. Number failed: 25. Number tried: 2116. Number failed: 25. Number tried: 2117. Number failed: 25. Number tried: 2118. Number failed: 25. Number tried: 2119. Number failed: 25. Number tried: 2120. Number failed: 25. Number tried: 2121. Number failed: 25. Number tried: 2122. Number failed: 25. Number tried: 2123. Number failed: 25. Number tried: 2124. Number failed: 25. Number tried: 2125. Number failed: 25. Number tried: 2126. Number failed: 25. Number tried: 2127. Number failed: 25. Number tried: 2128. Number failed: 25. Number tried: 2129. Number failed: 25. Number tried: 2130. Number failed: 25. Number tried: 2131. Number failed: 25. Number tried: 2132. Number failed: 25. Number tried: 2133. Number failed: 25. Number tried: 2134. Number failed: 25. Number tried: 2135. Number failed: 25. Number tried: 2136. Number failed: 25. Number tried: 2137. Number failed: 25. Number tried: 2138. Number failed: 25. Number tried: 2139. Number failed: 25. Number tried: 2140. Number failed: 25. Number tried: 2141. Number failed: 25. Number tried: 2142. Number failed: 25. Number tried: 2143. Number failed: 25. Number tried: 2144. Number failed: 25. Number tried: 2145. Number failed: 25. Number tried: 2146. Number failed: 25. Number tried: 2147. Number failed: 25. Number tried: 2148. Number failed: 25. Number tried: 2149. Number failed: 25. Number tried: 2150. Number failed: 25. Number tried: 2151. Number failed: 25. Number tried: 2152. Number failed: 25. Number tried: 2153. Number failed: 25. Number tried: 2154. Number failed: 25. Number tried: 2155. Number failed: 25. Number tried: 2156. Number failed: 25. Number tried: 2157. Number failed: 25. Number tried: 2158. Number failed: 25. Number tried: 2159. Number failed: 25. Number tried: 2160. Number failed: 25. Number tried: 2161. Number failed: 25. Number tried: 2162. Number failed: 25. Number tried: 2163. Number failed: 25. Number tried: 2164. Number failed: 25. Number tried: 2165. Number failed: 25. Number tried: 2166. Number failed: 25. Number tried: 2167. Number failed: 25. Number tried: 2168. Number failed: 25. Number tried: 2169. Number failed: 25. Number tried: 2170. Number failed: 25. Number tried: 2171. Number failed: 25. Number tried: 2172. Number failed: 25. Number tried: 2173. Number failed: 25. Number tried: 2174. Number failed: 25. Number tried: 2175. Number failed: 25. Number tried: 2176. Number failed: 25. Number tried: 2177. Number failed: 25. Number tried: 2178. Number failed: 25. Number tried: 2179. Number failed: 25. Number tried: 2180. Number failed: 25. Number tried: 2181. Number failed: 25. Number tried: 2182. Number failed: 25. Number tried: 2183. Number failed: 25. Number tried: 2184. Number failed: 25. Number tried: 2185. Number failed: 25. Number tried: 2186. Number failed: 25. Number tried: 2187. Number failed: 25. Number tried: 2188. Number failed: 25. Number tried: 2189. Number failed: 25. Number tried: 2190. Number failed: 25. Number tried: 2191. Number failed: 25. Number tried: 2192. Number failed: 25. Number tried: 2193. Number failed: 25. Number tried: 2194. Number failed: 25. Number tried: 2195. Number failed: 25. Number tried: 2196. Number failed: 25. Number tried: 2197. Number failed: 25. Number tried: 2198. Number failed: 25. Number tried: 2199. Number failed: 25. Number tried: 2200. Number failed: 25. Number tried: 2201. Number failed: 25. Number tried: 2202. Number failed: 25. Number tried: 2203. Number failed: 25. Number tried: 2204. Number failed: 25. Number tried: 2205. Number failed: 25. Number tried: 2206. Number failed: 25. Number tried: 2207. Number failed: 25. Number tried: 2208. Number failed: 25. Number tried: 2209. Number failed: 25. Number tried: 2210. Number failed: 25. Number tried: 2211. Number failed: 25. Number tried: 2212. Number failed: 25. Number tried: 2213. Number failed: 25. Number tried: 2214. Number failed: 25. Number tried: 2215. Number failed: 25. Number tried: 2216. Number failed: 25. Number tried: 2217. Number failed: 25. Number tried: 2218. Number failed: 25. Number tried: 2219. Number failed: 25. Number tried: 2220. Number failed: 25. Number tried: 2221. Number failed: 25. Number tried: 2222. Number failed: 25. Number tried: 2223. Number failed: 25. Number tried: 2224. Number failed: 25. Number tried: 2225. Number failed: 25. Number tried: 2226. Number failed: 25. Number tried: 2227. Number failed: 25. Number tried: 2228. Number failed: 25. Number tried: 2229. Number failed: 25. Number tried: 2230. Number failed: 25. Number tried: 2231. Number failed: 25. Number tried: 2232. Number failed: 25. Number tried: 2233. Number failed: 25. Number tried: 2234. Number failed: 25. Number tried: 2235. Number failed: 25. Number tried: 2236. Number failed: 25. Number tried: 2237. Number failed: 25. Number tried: 2238. Number failed: 25. Number tried: 2239. Number failed: 25. Number tried: 2240. Number failed: 25. Number tried: 2241. Number failed: 25. Number tried: 2242. Number failed: 25. Number tried: 2243. Number failed: 25. Number tried: 2244. Number failed: 25. Number tried: 2245. Number failed: 25. Number tried: 2246. Number failed: 25. Number tried: 2247. Number failed: 25. Number tried: 2248. Number failed: 25. Number tried: 2249. Number failed: 25. Number tried: 2250. Number failed: 25. Number tried: 2251. Number failed: 25. Number tried: 2252. Number failed: 25. Number tried: 2253. Number failed: 25. Number tried: 2254. Number failed: 25. Number tried: 2255. Number failed: 25. Number tried: 2256. Number failed: 25. Number tried: 2257. Number failed: 25. Number tried: 2258. Number failed: 25. Number tried: 2259. Number failed: 25. Number tried: 2260. Number failed: 25. Number tried: 2261. Number failed: 25. Number tried: 2262. Number failed: 25. Number tried: 2263. Number failed: 25. Number tried: 2264. Number failed: 25. Number tried: 2265. Number failed: 25. Number tried: 2266. Number failed: 25. Number tried: 2267. Number failed: 25. Number tried: 2268. Number failed: 25. Number tried: 2269. Number failed: 25. Number tried: 2270. Number failed: 25. Number tried: 2271. Number failed: 25. Number tried: 2272. Number failed: 25. Number tried: 2273. Number failed: 25. Number tried: 2274. Number failed: 25. Number tried: 2275. Number failed: 25. Number tried: 2276. Number failed: 25. Number tried: 2277. Number failed: 25. Number tried: 2278. Number failed: 25. Number tried: 2279. Number failed: 25. Number tried: 2280. Number failed: 25. Number tried: 2281. Number failed: 25. Number tried: 2282. Number failed: 25. Number tried: 2283. Number failed: 25. Number tried: 2284. Number failed: 25. Number tried: 2285. Number failed: 25. Number tried: 2286. Number failed: 25. Number tried: 2287. Number failed: 25. Number tried: 2288. Number failed: 25. Number tried: 2289. Number failed: 25. Number tried: 2290. Number failed: 25. Number tried: 2291. Number failed: 25. Number tried: 2292. Number failed: 25. Number tried: 2293. Number failed: 25. Number tried: 2294. Number failed: 25. Number tried: 2295. Number failed: 25. Number tried: 2296. Number failed: 25. Number tried: 2297. Number failed: 25. Number tried: 2298. Number failed: 25. Number tried: 2299. Number failed: 25. Number tried: 2300. Number failed: 25. Number tried: 2301. Number failed: 25. Number tried: 2302. Number failed: 25. Number tried: 2303. Number failed: 25. Number tried: 2304. Number failed: 25. Number tried: 2305. Number failed: 25. Number tried: 2306. Number failed: 25. Number tried: 2307. Number failed: 25. Number tried: 2308. Number failed: 25. Number tried: 2309. Number failed: 25. Number tried: 2310. Number failed: 25. Number tried: 2311. Number failed: 25. Number tried: 2312. Number failed: 25. Number tried: 2313. Number failed: 25. Number tried: 2314. Number failed: 25. Number tried: 2315. Number failed: 25. Number tried: 2316. Number failed: 25. Number tried: 2317. Number failed: 25. Number tried: 2318. Number failed: 25. Number tried: 2319. Number failed: 25. Number tried: 2320. Number failed: 25. Number tried: 2321. Number failed: 25. Number tried: 2322. Number failed: 25. Number tried: 2323. Number failed: 25. Number tried: 2324. Number failed: 25. Number tried: 2325. Number failed: 25. Number tried: 2326. Number failed: 25. Number tried: 2327. Number failed: 25. Number tried: 2328. Number failed: 25. Number tried: 2329. Number failed: 25. Number tried: 2330. Number failed: 25. Number tried: 2331. Number failed: 25. Number tried: 2332. Number failed: 25. Number tried: 2333. Number failed: 25. Number tried: 2334. Number failed: 25. Number tried: 2335. Number failed: 25. Number tried: 2336. Number failed: 25. Number tried: 2337. Number failed: 25. Number tried: 2338. Number failed: 25. Number tried: 2339. Number failed: 25. Number tried: 2340. Number failed: 25. Number tried: 2341. Number failed: 25. Number tried: 2342. Number failed: 25. Number tried: 2343. Number failed: 25. Number tried: 2344. Number failed: 25. Number tried: 2345. Number failed: 25. Number tried: 2346. Number failed: 25. Number tried: 2347. Number failed: 25. Number tried: 2348. Number failed: 25. Number tried: 2349. Number failed: 25. Number tried: 2350. Number failed: 25. Number tried: 2351. Number failed: 25. Number tried: 2352. Number failed: 25. Number tried: 2353. Number failed: 25. Number tried: 2354. Number failed: 25. Number tried: 2355. Number failed: 25. Number tried: 2356. Number failed: 25.
#%% To check on rate limit status
# rate_status = api.rate_limit_status()
# rate_status['resources']['statuses']['/statuses/show/:id']
#%% Load API data - read JSON strings from text file and create DataFrame
data_list = []
with open('tweet_json.txt', 'r') as file:
for line in file:
tweet_dict = json.loads(line.rstrip())
data_list.append(
[tweet_dict['id'],
tweet_dict['favorite_count'],
tweet_dict['retweet_count'],
tweet_dict['user']['id'],
tweet_dict['user']['name'],
tweet_dict['user']['url']
]
)
df_api = pd.DataFrame(data_list,
columns= ['tweet_id', 'favorite_count', 'retweet_count', 'user_id', 'user_name', 'user_url'])
df_archive.head()
| tweet_id | in_reply_to_status_id | in_reply_to_user_id | timestamp | source | text | retweeted_status_id | retweeted_status_user_id | retweeted_status_timestamp | expanded_urls | rating_numerator | rating_denominator | name | doggo | floofer | pupper | puppo | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 892420643555336193 | NaN | NaN | 2017-08-01 16:23:56 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Phineas. He's a mystical boy. Only eve... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/892420643... | 13 | 10 | Phineas | None | None | None | None |
| 1 | 892177421306343426 | NaN | NaN | 2017-08-01 00:17:27 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Tilly. She's just checking pup on you.... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/892177421... | 13 | 10 | Tilly | None | None | None | None |
| 2 | 891815181378084864 | NaN | NaN | 2017-07-31 00:18:03 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Archie. He is a rare Norwegian Pouncin... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/891815181... | 12 | 10 | Archie | None | None | None | None |
| 3 | 891689557279858688 | NaN | NaN | 2017-07-30 15:58:51 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Darla. She commenced a snooze mid meal... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/891689557... | 13 | 10 | Darla | None | None | None | None |
| 4 | 891327558926688256 | NaN | NaN | 2017-07-29 16:00:24 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Franklin. He would like you to stop ca... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/891327558... | 12 | 10 | Franklin | None | None | None | None |
df_archive.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2356 entries, 0 to 2355 Data columns (total 17 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 tweet_id 2356 non-null int64 1 in_reply_to_status_id 78 non-null float64 2 in_reply_to_user_id 78 non-null float64 3 timestamp 2356 non-null object 4 source 2356 non-null object 5 text 2356 non-null object 6 retweeted_status_id 181 non-null float64 7 retweeted_status_user_id 181 non-null float64 8 retweeted_status_timestamp 181 non-null object 9 expanded_urls 2297 non-null object 10 rating_numerator 2356 non-null int64 11 rating_denominator 2356 non-null int64 12 name 2356 non-null object 13 doggo 2356 non-null object 14 floofer 2356 non-null object 15 pupper 2356 non-null object 16 puppo 2356 non-null object dtypes: float64(4), int64(3), object(10) memory usage: 313.0+ KB
doggo, floofer, pupper, and puppo should be categorical, not object.df_api.head()
| tweet_id | favorite_count | retweet_count | user_id | user_name | user_url | |
|---|---|---|---|---|---|---|
| 0 | 892420643555336193 | 35001 | 7349 | 4196983835 | WeRateDogs® | https://t.co/6ytGi24QCk |
| 1 | 892177421306343426 | 30305 | 5480 | 4196983835 | WeRateDogs® | https://t.co/6ytGi24QCk |
| 2 | 891815181378084864 | 22794 | 3624 | 4196983835 | WeRateDogs® | https://t.co/6ytGi24QCk |
| 3 | 891689557279858688 | 38267 | 7532 | 4196983835 | WeRateDogs® | https://t.co/6ytGi24QCk |
| 4 | 891327558926688256 | 36547 | 8119 | 4196983835 | WeRateDogs® | https://t.co/6ytGi24QCk |
df_api.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2331 entries, 0 to 2330 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 tweet_id 2331 non-null int64 1 favorite_count 2331 non-null int64 2 retweet_count 2331 non-null int64 3 user_id 2331 non-null int64 4 user_name 2331 non-null object 5 user_url 2331 non-null object dtypes: int64(4), object(2) memory usage: 109.4+ KB
df_api.user_name.value_counts()
WeRateDogs® 2331 Name: user_name, dtype: int64
df_predictions.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2075 entries, 0 to 2074 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 tweet_id 2075 non-null int64 1 jpg_url 2075 non-null object 2 img_num 2075 non-null int64 3 p1 2075 non-null object 4 p1_conf 2075 non-null float64 5 p1_dog 2075 non-null bool 6 p2 2075 non-null object 7 p2_conf 2075 non-null float64 8 p2_dog 2075 non-null bool 9 p3 2075 non-null object 10 p3_conf 2075 non-null float64 11 p3_dog 2075 non-null bool dtypes: bool(3), float64(3), int64(2), object(4) memory usage: 152.1+ KB
# Are there duplicated tweet ids?
df_predictions.tweet_id.duplicated().sum()
0
# Are there cases in which the primary prediction is of something other than a dog? Cases in which all
# predictions are of something other than a dog?
df_p1_nondog = df_predictions[df_predictions.p1_dog == False]
df_p1_nondog.head()
| tweet_id | jpg_url | img_num | p1 | p1_conf | p1_dog | p2 | p2_conf | p2_dog | p3 | p3_conf | p3_dog | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 6 | 666051853826850816 | https://pbs.twimg.com/media/CT5KoJ1WoAAJash.jpg | 1 | box_turtle | 0.933012 | False | mud_turtle | 0.045885 | False | terrapin | 0.017885 | False |
| 8 | 666057090499244032 | https://pbs.twimg.com/media/CT5PY90WoAAQGLo.jpg | 1 | shopping_cart | 0.962465 | False | shopping_basket | 0.014594 | False | golden_retriever | 0.007959 | True |
| 17 | 666104133288665088 | https://pbs.twimg.com/media/CT56LSZWoAAlJj2.jpg | 1 | hen | 0.965932 | False | cock | 0.033919 | False | partridge | 0.000052 | False |
| 18 | 666268910803644416 | https://pbs.twimg.com/media/CT8QCd1WEAADXws.jpg | 1 | desktop_computer | 0.086502 | False | desk | 0.085547 | False | bookcase | 0.079480 | False |
| 21 | 666293911632134144 | https://pbs.twimg.com/media/CT8mx7KW4AEQu8N.jpg | 1 | three-toed_sloth | 0.914671 | False | otter | 0.015250 | False | great_grey_owl | 0.013207 | False |
# How many cases are there of primary (p1) predictions that are of nondogs?
len(df_p1_nondog)
543
# How many cases are there in which all three predictions are of nondogs?
df_pall_nondog = df_p1_nondog.query('~ (p2_dog or p3_dog)')
len(df_pall_nondog)
324
# Are joint predictions of non-dog often correct? Sometimes mistaken?
df_pall_nondog.jpg_url.head()
6 https://pbs.twimg.com/media/CT5KoJ1WoAAJash.jpg 17 https://pbs.twimg.com/media/CT56LSZWoAAlJj2.jpg 18 https://pbs.twimg.com/media/CT8QCd1WEAADXws.jpg 21 https://pbs.twimg.com/media/CT8mx7KW4AEQu8N.jpg 25 https://pbs.twimg.com/media/CT9lXGsUcAAyUFt.jpg Name: jpg_url, dtype: object
Among the five cases above in which all three predictions were non-dog, the joint predictions were wrong in only one case, index number 18. The picture is of the back of a dog whose frontside is only visible in a computer screen.
There are a lot of tweets of animals that are not dogs. The predictions in df_predictions can be used to help find them, albeit with some degree of error.
# Are there any duplicated tweet ids?
df_archive[df_archive.duplicated('tweet_id', keep=False)]
| tweet_id | in_reply_to_status_id | in_reply_to_user_id | timestamp | source | text | retweeted_status_id | retweeted_status_user_id | retweeted_status_timestamp | expanded_urls | rating_numerator | rating_denominator | name | doggo | floofer | pupper | puppo |
|---|
# Are there any null tweet ids?
df_archive.tweet_id.isnull().sum()
0
df_archive[df_archive.in_reply_to_status_id.notnull()]
| tweet_id | in_reply_to_status_id | in_reply_to_user_id | timestamp | source | text | retweeted_status_id | retweeted_status_user_id | retweeted_status_timestamp | expanded_urls | rating_numerator | rating_denominator | name | doggo | floofer | pupper | puppo | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 30 | 886267009285017600 | 8.862664e+17 | 2.281182e+09 | 2017-07-15 16:51:35 +0000 | <a href="http://twitter.com/download/iphone" r... | @NonWhiteHat @MayhewMayhem omg hello tanner yo... | NaN | NaN | NaN | NaN | 12 | 10 | None | None | None | None | None |
| 55 | 881633300179243008 | 8.816070e+17 | 4.738443e+07 | 2017-07-02 21:58:53 +0000 | <a href="http://twitter.com/download/iphone" r... | @roushfenway These are good dogs but 17/10 is ... | NaN | NaN | NaN | NaN | 17 | 10 | None | None | None | None | None |
| 64 | 879674319642796034 | 8.795538e+17 | 3.105441e+09 | 2017-06-27 12:14:36 +0000 | <a href="http://twitter.com/download/iphone" r... | @RealKentMurphy 14/10 confirmed | NaN | NaN | NaN | NaN | 14 | 10 | None | None | None | None | None |
| 113 | 870726314365509632 | 8.707262e+17 | 1.648776e+07 | 2017-06-02 19:38:25 +0000 | <a href="http://twitter.com/download/iphone" r... | @ComplicitOwl @ShopWeRateDogs >10/10 is res... | NaN | NaN | NaN | NaN | 10 | 10 | None | None | None | None | None |
| 148 | 863427515083354112 | 8.634256e+17 | 7.759620e+07 | 2017-05-13 16:15:35 +0000 | <a href="http://twitter.com/download/iphone" r... | @Jack_Septic_Eye I'd need a few more pics to p... | NaN | NaN | NaN | NaN | 12 | 10 | None | None | None | None | None |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2038 | 671550332464455680 | 6.715449e+17 | 4.196984e+09 | 2015-12-01 04:44:10 +0000 | <a href="http://twitter.com/download/iphone" r... | After 22 minutes of careful deliberation this ... | NaN | NaN | NaN | NaN | 1 | 10 | None | None | None | None | None |
| 2149 | 669684865554620416 | 6.693544e+17 | 4.196984e+09 | 2015-11-26 01:11:28 +0000 | <a href="http://twitter.com/download/iphone" r... | After countless hours of research and hundreds... | NaN | NaN | NaN | NaN | 11 | 10 | None | None | None | None | None |
| 2169 | 669353438988365824 | 6.678065e+17 | 4.196984e+09 | 2015-11-25 03:14:30 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Tessa. She is also very pleased after ... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/669353438... | 10 | 10 | Tessa | None | None | None | None |
| 2189 | 668967877119254528 | 6.689207e+17 | 2.143566e+07 | 2015-11-24 01:42:25 +0000 | <a href="http://twitter.com/download/iphone" r... | 12/10 good shit Bubka\n@wane15 | NaN | NaN | NaN | NaN | 12 | 10 | None | None | None | None | None |
| 2298 | 667070482143944705 | 6.670655e+17 | 4.196984e+09 | 2015-11-18 20:02:51 +0000 | <a href="http://twitter.com/download/iphone" r... | After much debate this dog is being upgraded t... | NaN | NaN | NaN | NaN | 10 | 10 | None | None | None | None | None |
78 rows × 17 columns
i = 0
for text in df_archive[df_archive.in_reply_to_status_id.notnull()].text:
print(i, text)
i += 1
0 @NonWhiteHat @MayhewMayhem omg hello tanner you are a scary good boy 12/10 would pet with extreme caution 1 @roushfenway These are good dogs but 17/10 is an emotional impulse rating. More like 13/10s 2 @RealKentMurphy 14/10 confirmed 3 @ComplicitOwl @ShopWeRateDogs >10/10 is reserved for dogs 4 @Jack_Septic_Eye I'd need a few more pics to polish a full analysis, but based on the good boy content above I'm leaning towards 12/10 5 Ladies and gentlemen... I found Pipsy. He may have changed his name to Pablo, but he never changed his love for the sea. Pupgraded to 14/10 https://t.co/lVU5GyNFen 6 @Marc_IRL pixelated af 12/10 7 THIS IS CHARLIE, MARK. HE DID JUST WANT TO SAY HI AFTER ALL. PUPGRADED TO A 14/10. WOULD BE AN HONOR TO FLY WITH https://t.co/p1hBHCmWnA 8 @xianmcguire @Jenna_Marbles Kardashians wouldn't be famous if as a society we didn't place enormous value on what they do. The dogs are very deserving of their 14/10 9 @dhmontgomery We also gave snoop dogg a 420/10 but I think that predated your research 10 @s8n You tried very hard to portray this good boy as not so good, but you have ultimately failed. His goodness shines through. 666/10 11 @markhoppus MARK THAT DOG HAS SEEN AND EXPERIENCED MANY THINGS. PROBABLY LOST OTHER EAR DOING SOMETHING HEROIC. 13/10 HUG THE DOG HOPPUS 12 Jerry just apuppologized to me. He said there was no ill-intent to the slippage. I overreacted I admit. Pupgraded to an 11/10 would pet 13 .@breaannanicolee PUPDATE: Cannon has a heart on his nose. Pupgraded to a 13/10 14 PUPDATE: I'm proud to announce that Toby is 236 days sober. Pupgraded to a 13/10. We're all very proud of you, Toby https://t.co/a5OaJeRl9B 15 @0_kelvin_0 >10/10 is reserved for puppos sorry Kevin 16 @markhoppus 182/10 17 @bragg6of8 @Andy_Pace_ we are still looking for the first 15/10 18 @jonnysun @Lin_Manuel ok jomny I know you're excited but 960/00 isn't a valid rating, 13/10 is tho 19 @docmisterio account started on 11/15/15 20 @UNC can confirm 12/10 21 I was going to do 007/10, but the joke wasn't worth the <10 rating 22 @HistoryInPics 13/10 23 @imgur for a polar bear tho I'd say 13/10 is appropriate 24 I've been informed by multiple sources that this is actually a dog elf who's tired from helping Santa all night. Pupgraded to 12/10 25 PUPDATE: I've been informed that Augie was actually bringing his family these flowers when he tripped. Very good boy. Pupgraded to 11/10 26 Like doggo, like pupper version 2. Both 11/10 https://t.co/9IxWAXFqze 27 .@NBCSports OMG THE TINY HAT I'M GOING TO HAVE TO SAY 11/10 NBC 28 @SkyWilliams doggo simply protecting you from evil that which you cannot see. 11/10 would give extra pets 29 @JODYHiGHROLLER it may be an 11/10 but what do I know 😉 30 13/10 for breakdancing puppo @shibbnbot 31 His name is Charley and he already has a new set of wheels thanks to donations. I heard his top speed was also increased. 13/10 for Charley 32 @TheEllenShow I'm not sure if you know this but that doggo right there is a 12/10 33 13/10 such a good doggo @spaghemily 34 Other pupper asked not to have his identity shared. Probably just embarrassed about the headbutt. Also 12/10 it'll be ok mystery pup 35 PUPDATE: can't see any. Even if I could, I couldn't reach them to pet. 0/10 much disappointment https://t.co/c7WXaB2nqX 36 Guys... Dog Jesus 2.0 13/10 buoyant af https://t.co/CuNA7OwfKQ 37 @mount_alex3 13/10 38 "Challenge completed" (pupgraded to 12/10) https://t.co/85dTK7XCXB 39 @serial @MrRoles OH MY GOD I listened to all of season 1 during a single road trip. I love you guys! I can confirm Bernie's 12/10 rating :) 40 Seriously, add us 🐶 11/10 for sad wet pupper https://t.co/xwPE9faVZR 41 I found a forest Pipsy. 12/10 https://t.co/mIQ1KoVsmU 42 13/10 hero af @ABC 43 Really guys? Again? I know this is a rare Albanian Bingo Seal, but we only rate dogs. Only send in dogs... 9/10 https://t.co/6JYLpUmBrC 44 After reading the comments I may have overestimated this pup. Downgraded to a 1/10. Please forgive me 45 If you are aware of who is making these please let me know. 13/10 vroom vroom https://t.co/U0D1sbIDrG 46 This pupper only appears through the hole of a Funyun. Much like Phineas, this one is also mysterious af. 10/10 https://t.co/SQsEBWxPyG 47 BREAKING PUPDATE: I've just been notified that (if in U.S.) this dog appears to be operating the vehicle. Upgraded to 10/10. Skilled af 48 Personally I'd give him an 11/10. Not sure why you think you're qualified to rate such a stellar pup. @CommonWhiteGirI 49 PUPDATE: just noticed this dog has some extra legs. Very advanced. Revolutionary af. Upgraded to a 9/10 50 These are some pictures of Teddy that further justify his 13/10 rating. Please enjoy https://t.co/tDkJAnQsbQ 51 12/10 @LightningHoltt 52 Yes I do realize a rating of 4/20 would've been fitting. However, it would be unjust to give these cooperative pups that low of a rating 53 Jack deserves another round of applause. If you missed this earlier today I strongly suggest reading it. Wonderful first 14/10 🐶❤️ 54 For those who claim this is a goat, u are wrong. It is not the Greatest Of All Time. The rating of 5/10 should have made that clear. Thank u 55 After watching this video, we've determined that Pippa will be upgraded to a 12/10. Please enjoy https://t.co/IKoRK4yoxV 56 Two sneaky puppers were not initially seen, moving the rating to 143/130. Please forgive us. Thank you https://t.co/kRK51Y5ac3 57 I'm aware that I could've said 20/16, but here at WeRateDogs we are very professional. An inconsistent rating scale is simply irresponsible 58 I've been told there's a slight possibility he's checking his mirror. We'll bump to 9.5/10. Still a menace 59 After getting lost in Reese's eyes for several minutes we're going to upgrade him to a 13/10 60 After some outrage from the crowd. Bubbles is being upgraded to a 7/10. That's as high as I'm going. Thank you 61 & this is Yoshi. Another world record contender 11/10 (what the hell is happening why are there so many contenders?) https://t.co/QG708dDNH6 62 This dog is being demoted to a 9/10 for not wearing a helmet while riding. Gotta stay safe out there. Thank you 63 We've got ourselves a battle here. Watch out Reggie. 11/10 https://t.co/ALJvbtcwf0 64 Yea I lied. Here's more. All 13/10 https://t.co/ZQZf2U4xCP 65 Ok last one of these. I may try to make some myself. Anyway here ya go. 13/10 https://t.co/i9CDd1oEu8 66 I have found another. 13/10 https://t.co/HwroPYv8pY 67 Just received another perfect photo of dogs and the sunset. 12/10 https://t.co/9YmNcxA2Cc 68 Some clarification is required. The dog is singing Cher and that is more than worthy of an 11/10. Thank you 69 The 13/10 also takes into account this impeccable yard. Louis is great but the future dad in me can't ignore that luscious green grass 70 13/10 @ABC7 71 The millennials have spoken and we've decided to immediately demote to a 1/10. Thank you 72 I'm just going to leave this one here as well. 13/10 https://t.co/DaD5SyajWt 73 After 22 minutes of careful deliberation this dog is being demoted to a 1/10. The longer you look at him the more terrifying he becomes 74 After countless hours of research and hundreds of formula alterations we have concluded that Dug should be bumped to an 11/10 75 This is Tessa. She is also very pleased after finally meeting her biological father. 10/10 https://t.co/qDS1aCqppv 76 12/10 good shit Bubka @wane15 77 After much debate this dog is being upgraded to 10/10. I repeat 10/10
df_archive[df_archive.in_reply_to_status_id.notnull() & df_archive.expanded_urls.notnull()].in_reply_to_user_id.astype('int64')
149 4196983835 184 4196983835 251 4196983835 565 4196983835 1016 4196983835 1018 4196983835 1127 4196983835 1330 4196983835 1339 4196983835 1356 4196983835 1452 4196983835 1464 4196983835 1501 4196983835 1630 4196983835 1634 4196983835 1842 4196983835 1852 4196983835 1866 4196983835 1882 4196983835 1885 4196983835 1892 4196983835 2036 4196983835 2169 4196983835 Name: in_reply_to_user_id, dtype: int64
len(df_archive[df_archive.in_reply_to_status_id.notnull() & df_archive.expanded_urls.notnull()])
23
df_archive.source.value_counts()
<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a> 2221 <a href="http://vine.co" rel="nofollow">Vine - Make a Scene</a> 91 <a href="http://twitter.com" rel="nofollow">Twitter Web Client</a> 33 <a href="https://about.twitter.com/products/tweetdeck" rel="nofollow">TweetDeck</a> 11 Name: source, dtype: int64
source should be split into two variables - source_name and source_weblink - extracted from the HTML. source_name should be categorical and source_weblink should be string (or object). df_archive.retweeted_status_id.notnull().sum()
181
df_retweet = df_archive[df_archive.retweeted_status_id.notnull()]['retweeted_status_id'].astype('Int64')
retweet_list = df_retweet.tolist()
retweet_list
[887473957103951872, 886053734421102592, 830583320585068544, 878057613040115712, 878281511006478336, 669000397445533696, 876850772322988032, 866334964761202688, 868880397819494400, 873213775632977920, 872657584259551232, 841077006473256960, 859196978902773760, 868552278524837888, 865013420445368320, 866450705531457536, 837820167694528512, 863062471531167744, 806629075125202944, 860563773140209664, 860914485250469888, 761672994376806400, 839549326359670784, 857061112319234048, 844704788403113984, 856330158768218112, 842163532590374912, 855122533267460096, 831650051525054464, 829374341691346944, 848289382176100352, 833124694597443584, 832369877331693568, 847971000004354048, 756288534030475264, 773308824254029824, 807106840509214720, 817423860136083456, 840632337062862848, 667152164079423488, 839289919298224128, 838905980628819968, 783839966405230592, 820749716845686784, 837011344666812416, 836648149003485184, 817827839487737856, 786963064373534720, 835264098648616960, 753039830821511168, 829501995190984704, 832434358292209664, 832766382198566912, 786709082849828864, 793286476301799424, 769940425801170944, 809220051211603968, 826958653328592896, 810254108431155200, 694669722378485760, 809920764300447744, 825026590719483904, 795076730285391872, 784057939640352768, 822244816520155136, 822489057087389696, 786233965241827328, 780601303617732608, 782305867769217024, 819227688460238848, 820314633777061888, 800141422401830912, 819952236453363712, 819004803107983360, 819006400881917952, 808344865868283904, 773547596996571136, 816450570814898176, 692417313023332352, 815966073409433600, 790946055508652032, 815990720817401856, 732005617171337216, 791406955684368384, 698195409219559424, 790277117346975744, 680055455951884288, 793962221541933056, 801167903437357056, 782969140009107456, 781524693396357120, 783334639985389568, 782722598790725632, 784183165795655680, 784826020293709824, 691416866452082688, 767754930266464256, 777684233540206592, 779056095788752896, 800065028116385792, 775733305207554048, 800854022970286080, 776113305656188928, 681694085539872768, 775085132600442880, 774314403806253056, 740676976021798912, 718631497683582976, 712809025985978368, 701214700881756160, 683391852557561856, 676936541936185344, 675501075957489664, 671896809300709376, 670444955656130560, 667509364010450944, 667182792070062080, 666104133288665088, 771770456517009408, 687317306314240000, 780931614150983680, 796149749086875648, 789530877013393408, 788765914992902144, 791672322847637504, 684830982659280896, 682088079302213632, 746757706116112384, 763837565564780544, 789986466051088384, 762699858130116608, 762464539388485632, 750719632563142656, 736392552031657984, 761004547850530816, 752932432744185856, 759447681597108224, 779834332596887552, 674291837063053312, 707610948723478528, 742423170473463808, 780465709297995776, 753375668877008896, 679462823135686656, 758099635764359168, 703041949650034688, 768193404517830656, 750429297815552000, 679828447187857408, 700747788515020800, 733109485275860992, 740373189193256960, 771380798096281600, 765222098633691136, 673320132811366400, 771002130450743296, 739238157791694848, 741067306818797568, 706904523814649856, 700143752053182464, 739979191639244800, 759923798737051648, 725842289046749184, 673295268553605120, 685325112850124800, 711694788429553664, 760153833259601920, 739544079319588864, 670319130621435904, 679062614270468096, 757597141099548672, 679158373988876288, 681523177663676416, 683515932363329536, 675354435921575936, 704761120771465216, 667866724293877760, 667138269671505920, 711998279773347840, 667548695664070656, 667548415174144000]
len(retweet_list)
181
# How many tweet_id's have been retweeted?
df_archive.query('tweet_id in @retweet_list')
| tweet_id | in_reply_to_status_id | in_reply_to_user_id | timestamp | source | text | retweeted_status_id | retweeted_status_user_id | retweeted_status_timestamp | expanded_urls | rating_numerator | rating_denominator | name | doggo | floofer | pupper | puppo | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 75 | 878281511006478336 | NaN | NaN | 2017-06-23 16:00:04 +0000 | <a href="http://twitter.com/download/iphone" r... | Meet Shadow. In an attempt to reach maximum zo... | NaN | NaN | NaN | https://www.gofundme.com/3yd6y1c,https://twitt... | 13 | 10 | Shadow | None | None | None | None |
| 76 | 878057613040115712 | NaN | NaN | 2017-06-23 01:10:23 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Emmy. She was adopted today. Massive r... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/878057613... | 14 | 10 | Emmy | None | None | None | None |
| 98 | 873213775632977920 | NaN | NaN | 2017-06-09 16:22:42 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Sierra. She's one precious pupper. Abs... | NaN | NaN | NaN | https://www.gofundme.com/help-my-baby-sierra-g... | 12 | 10 | Sierra | None | None | pupper | None |
| 126 | 868552278524837888 | NaN | NaN | 2017-05-27 19:39:34 +0000 | <a href="http://twitter.com/download/iphone" r... | Say hello to Cooper. His expression is the sam... | NaN | NaN | NaN | https://www.gofundme.com/3ti3nps,https://twitt... | 12 | 10 | Cooper | None | None | None | None |
| 150 | 863062471531167744 | NaN | NaN | 2017-05-12 16:05:02 +0000 | <a href="http://twitter.com/download/iphone" r... | Say hello to Quinn. She's quite the goofball. ... | NaN | NaN | NaN | https://www.gofundme.com/helpquinny,https://tw... | 13 | 10 | Quinn | None | None | None | None |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2180 | 669000397445533696 | NaN | NaN | 2015-11-24 03:51:38 +0000 | <a href="http://twitter.com/download/iphone" r... | Meet Terrance. He's being yelled at because he... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/669000397... | 11 | 10 | Terrance | None | None | None | None |
| 2248 | 667866724293877760 | NaN | NaN | 2015-11-21 00:46:50 +0000 | <a href="http://twitter.com/download/iphone" r... | This is Shaggy. He knows exactly how to solve ... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/667866724... | 10 | 10 | Shaggy | None | None | None | None |
| 2269 | 667509364010450944 | NaN | NaN | 2015-11-20 01:06:48 +0000 | <a href="http://twitter.com" rel="nofollow">Tw... | This a Norwegian Pewterschmidt named Tickles. ... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/667509364... | 12 | 10 | None | None | None | None | None |
| 2294 | 667138269671505920 | NaN | NaN | 2015-11-19 00:32:12 +0000 | <a href="http://twitter.com/download/iphone" r... | Extremely intelligent dog here. Has learned to... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/667138269... | 10 | 10 | None | None | None | None | None |
| 2338 | 666104133288665088 | NaN | NaN | 2015-11-16 04:02:55 +0000 | <a href="http://twitter.com/download/iphone" r... | Not familiar with this breed. No tail (weird).... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666104133... | 1 | 10 | None | None | None | None | None |
112 rows × 17 columns
There are 181 nonnull retweeted_status_id's, but only 112 of them show up in the tweet_id's. Presumably the others are retweets of tweets outside the time period of this selection or they are from tweets that were not included in this selection in some other way. Let's examine a pair of a tweet and retweet below.
df_archive.query('tweet_id == 878281511006478336 or retweeted_status_id == 878281511006478336')
| tweet_id | in_reply_to_status_id | in_reply_to_user_id | timestamp | source | text | retweeted_status_id | retweeted_status_user_id | retweeted_status_timestamp | expanded_urls | rating_numerator | rating_denominator | name | doggo | floofer | pupper | puppo | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 73 | 878404777348136964 | NaN | NaN | 2017-06-24 00:09:53 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @dog_rates: Meet Shadow. In an attempt to r... | 8.782815e+17 | 4.196984e+09 | 2017-06-23 16:00:04 +0000 | https://www.gofundme.com/3yd6y1c,https://twitt... | 13 | 10 | Shadow | None | None | None | None |
| 75 | 878281511006478336 | NaN | NaN | 2017-06-23 16:00:04 +0000 | <a href="http://twitter.com/download/iphone" r... | Meet Shadow. In an attempt to reach maximum zo... | NaN | NaN | NaN | https://www.gofundme.com/3yd6y1c,https://twitt... | 13 | 10 | Shadow | None | None | None | None |
Index number 75 is the tweet and index number 73 is the retweet. Notice that the timestamp of the tweet is the same as the retweeted_status_timestamp, as expected. Plus, as expected, expanded_urls look to be the same. Also notice that the retweet begins with "RT @dog_rates:" and then repeats the text of the tweet. Can we find all the retweets not only by looking for retweeted_status_id, but also using the text variable?
pattern = r"RT"
df_archive.text.str.match(pattern).sum()
183
# Why did searching for retweets by searching for 'RT' at the beginning in the text column produce two more hits?
df_archive[df_archive.text.str.match(pattern) & df_archive.retweeted_status_id.isnull()]
| tweet_id | in_reply_to_status_id | in_reply_to_user_id | timestamp | source | text | retweeted_status_id | retweeted_status_user_id | retweeted_status_timestamp | expanded_urls | rating_numerator | rating_denominator | name | doggo | floofer | pupper | puppo | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1286 | 708400866336894977 | NaN | NaN | 2016-03-11 21:15:02 +0000 | <a href="http://vine.co" rel="nofollow">Vine -... | RT if you are as ready for summer as this pup ... | NaN | NaN | NaN | https://vine.co/v/iHFqnjKVbIQ | 12 | 10 | None | None | None | None | None |
| 1860 | 675489971617296384 | NaN | NaN | 2015-12-12 01:38:53 +0000 | <a href="http://twitter.com/download/iphone" r... | RT until we find this dog. Clearly a cool dog ... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/675489971... | 10 | 10 | None | None | None | None | None |
# Let's do a more precise search for "RT @dog_rates:"
pattern = r"RT @dog_rates:"
df_archive.text.str.match(pattern).sum()
156
# Why did we get less than 181?
df_archive[(~df_archive.text.str.match(pattern)) & df_archive.retweeted_status_id.notnull()]
| tweet_id | in_reply_to_status_id | in_reply_to_user_id | timestamp | source | text | retweeted_status_id | retweeted_status_user_id | retweeted_status_timestamp | expanded_urls | rating_numerator | rating_denominator | name | doggo | floofer | pupper | puppo | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 32 | 886054160059072513 | NaN | NaN | 2017-07-15 02:45:48 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @Athletics: 12/10 #BATP https://t.co/WxwJmv... | 8.860537e+17 | 1.960740e+07 | 2017-07-15 02:44:07 +0000 | https://twitter.com/dog_rates/status/886053434... | 12 | 10 | None | None | None | None | None |
| 78 | 877611172832227328 | NaN | NaN | 2017-06-21 19:36:23 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @rachel2195: @dog_rates the boyfriend and h... | 8.768508e+17 | 5.128045e+08 | 2017-06-19 17:14:49 +0000 | https://twitter.com/rachel2195/status/87685077... | 14 | 10 | None | None | None | pupper | None |
| 101 | 872668790621863937 | NaN | NaN | 2017-06-08 04:17:07 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @loganamnosis: Penelope here is doing me qu... | 8.726576e+17 | 1.547674e+08 | 2017-06-08 03:32:35 +0000 | https://twitter.com/loganamnosis/status/872657... | 14 | 10 | None | None | None | None | None |
| 130 | 867072653475098625 | NaN | NaN | 2017-05-23 17:40:04 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @rachaeleasler: these @dog_rates hats are 1... | 8.650134e+17 | 7.874618e+17 | 2017-05-18 01:17:25 +0000 | https://twitter.com/rachaeleasler/status/86501... | 13 | 10 | None | None | None | None | None |
| 160 | 860924035999428608 | NaN | NaN | 2017-05-06 18:27:40 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @tallylott: h*ckin adorable promposal. 13/1... | 8.609145e+17 | 3.638908e+08 | 2017-05-06 17:49:42 +0000 | https://twitter.com/tallylott/status/860914485... | 13 | 10 | None | None | None | None | None |
| 180 | 857062103051644929 | NaN | NaN | 2017-04-26 02:41:43 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @AaronChewning: First time wearing my @dog_... | 8.570611e+17 | 5.870972e+07 | 2017-04-26 02:37:47 +0000 | https://twitter.com/AaronChewning/status/85706... | 13 | 10 | None | None | None | None | None |
| 185 | 856330835276025856 | NaN | NaN | 2017-04-24 02:15:55 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @Jenna_Marbles: @dog_rates Thanks for ratin... | 8.563302e+17 | 6.669901e+07 | 2017-04-24 02:13:14 +0000 | NaN | 14 | 10 | None | None | None | None | None |
| 195 | 855138241867124737 | NaN | NaN | 2017-04-20 19:16:59 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @frasercampbell_: oh my... what's that... b... | 8.551225e+17 | 7.475543e+17 | 2017-04-20 18:14:33 +0000 | https://twitter.com/frasercampbell_/status/855... | 14 | 10 | None | None | None | None | None |
| 212 | 851861385021730816 | NaN | NaN | 2017-04-11 18:15:55 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @eddie_coe98: Thanks @dog_rates completed m... | 8.482894e+17 | 3.410211e+08 | 2017-04-01 21:42:03 +0000 | https://twitter.com/eddie_coe98/status/8482893... | 10 | 10 | None | None | None | None | None |
| 231 | 847971574464610304 | NaN | NaN | 2017-04-01 00:39:12 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @basic_vacek_: I love my new mug easy 13/10... | 8.479710e+17 | 5.970642e+08 | 2017-04-01 00:36:55 +0000 | https://twitter.com/basic_vacek_/status/847971... | 13 | 10 | None | None | None | None | None |
| 281 | 839290600511926273 | NaN | NaN | 2017-03-08 01:44:07 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @alexmartindawg: THE DRINK IS DR. PUPPER 10... | 8.392899e+17 | 4.119842e+07 | 2017-03-08 01:41:24 +0000 | https://twitter.com/alexmartindawg/status/8392... | 10 | 10 | None | None | None | pupper | None |
| 285 | 838916489579200512 | NaN | NaN | 2017-03-07 00:57:32 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @KibaDva: I collected all the good dogs!! 1... | 8.389060e+17 | 8.117408e+08 | 2017-03-07 00:15:46 +0000 | https://twitter.com/KibaDva/status/83890598062... | 15 | 10 | None | None | None | None | None |
| 298 | 837012587749474308 | NaN | NaN | 2017-03-01 18:52:06 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @KennyFromDaBlok: 14/10 h*ckin good hats. w... | 8.370113e+17 | 7.266347e+08 | 2017-03-01 18:47:10 +0000 | https://twitter.com/KennyFromDaBlok/status/837... | 14 | 10 | None | None | None | None | None |
| 302 | 836648853927522308 | NaN | NaN | 2017-02-28 18:46:45 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @SchafeBacon2016: @dog_rates Slightly distu... | 8.366481e+17 | 7.124572e+17 | 2017-02-28 18:43:57 +0000 | https://twitter.com/SchafeBacon2016/status/836... | 11 | 10 | None | None | None | None | None |
| 327 | 833732339549220864 | NaN | NaN | 2017-02-20 17:37:34 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @rolltidered: This is Gabby. Now requests t... | 8.324344e+17 | 4.466750e+07 | 2017-02-17 03:39:51 +0000 | https://twitter.com/rolltidered/status/8324343... | 12 | 10 | Gabby | None | None | None | None |
| 332 | 832769181346996225 | NaN | NaN | 2017-02-18 01:50:19 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @EmilieGambril: 12/10 h*cking excited about... | 8.327664e+17 | 4.871977e+08 | 2017-02-18 01:39:12 +0000 | https://twitter.com/EmilieGambril/status/83276... | 12 | 10 | None | None | None | None | None |
| 568 | 802185808107208704 | NaN | NaN | 2016-11-25 16:22:55 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @ChinoChinako: They're good products, Brent... | 8.000650e+17 | 2.488557e+07 | 2016-11-19 19:55:41 +0000 | https://twitter.com/ChinoChinako/status/800065... | 13 | 10 | None | None | None | None | None |
| 577 | 800855607700029440 | NaN | NaN | 2016-11-22 00:17:10 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @Lin_Manuel: 11/10 would recommend. https:/... | 8.008540e+17 | 7.992370e+07 | 2016-11-22 00:10:52 +0000 | https://twitter.com/littlewiewel/status/800852... | 11 | 10 | None | None | None | None | None |
| 742 | 780476555013349377 | NaN | NaN | 2016-09-26 18:38:05 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @Patreon: Well. @dog_rates is on Patreon. \... | 7.804657e+17 | 1.228326e+09 | 2016-09-26 17:55:00 +0000 | https://www.patreon.com/WeRateDogs,https://twi... | 12 | 10 | None | None | None | None | None |
| 815 | 771004394259247104 | NaN | NaN | 2016-08-31 15:19:06 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @katieornah: @dog_rates learning a lot at c... | 7.710021e+17 | 1.732729e+09 | 2016-08-31 15:10:07 +0000 | https://twitter.com/katieornah/status/77100213... | 12 | 10 | None | None | None | pupper | None |
| 885 | 760153949710192640 | NaN | NaN | 2016-08-01 16:43:19 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @hownottodraw: The story/person behind @dog... | 7.601538e+17 | 1.950368e+08 | 2016-08-01 16:42:51 +0000 | https://weratedogs.com/pages/about-us,https://... | 11 | 10 | None | None | None | None | None |
| 911 | 757597904299253760 | NaN | NaN | 2016-07-25 15:26:30 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @jon_hill987: @dog_rates There is a cunning... | 7.575971e+17 | 2.804798e+08 | 2016-07-25 15:23:28 +0000 | https://twitter.com/jon_hill987/status/7575971... | 11 | 10 | None | None | None | pupper | None |
| 1242 | 711998809858043904 | NaN | NaN | 2016-03-21 19:31:59 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @twitter: @dog_rates Awesome Tweet! 12/10. ... | 7.119983e+17 | 7.832140e+05 | 2016-03-21 19:29:52 +0000 | https://twitter.com/twitter/status/71199827977... | 12 | 10 | None | None | None | None | None |
| 2259 | 667550904950915073 | NaN | NaN | 2015-11-20 03:51:52 +0000 | <a href="http://twitter.com" rel="nofollow">Tw... | RT @dogratingrating: Exceptional talent. Origi... | 6.675487e+17 | 4.296832e+09 | 2015-11-20 03:43:06 +0000 | https://twitter.com/dogratingrating/status/667... | 12 | 10 | None | None | None | None | None |
| 2260 | 667550882905632768 | NaN | NaN | 2015-11-20 03:51:47 +0000 | <a href="http://twitter.com" rel="nofollow">Tw... | RT @dogratingrating: Unoriginal idea. Blatant ... | 6.675484e+17 | 4.296832e+09 | 2015-11-20 03:41:59 +0000 | https://twitter.com/dogratingrating/status/667... | 5 | 10 | None | None | None | None | None |
# The pattern included too much. Use: "RT @"
pattern = r"RT @"
df_archive.text.str.match(pattern).sum()
181
# Are there any duplicates in `expanded_urls`?
df2 = df_archive[df_archive.duplicated('expanded_urls', keep=False)]
df2.expanded_urls.value_counts()
https://twitter.com/dog_rates/status/816450570814898180/photo/1,https://twitter.com/dog_rates/status/816450570814898180/photo/1 2
https://twitter.com/dog_rates/status/859196978902773760/video/1 2
https://twitter.com/dog_rates/status/681523177663676416/photo/1 2
https://www.gofundme.com/lolas-life-saving-surgery-funds,https://twitter.com/dog_rates/status/835264098648616962/photo/1,https://twitter.com/dog_rates/status/835264098648616962/photo/1 2
https://twitter.com/dog_rates/status/667138269671505920/photo/1 2
..
https://twitter.com/dog_rates/status/775733305207554048/photo/1 2
https://www.gofundme.com/my-puppys-double-cataract-surgery,https://twitter.com/dog_rates/status/825026590719483904/photo/1,https://twitter.com/dog_rates/status/825026590719483904/photo/1 2
https://twitter.com/dog_rates/status/844704788403113984/photo/1 2
https://www.gofundme.com/help-my-baby-sierra-get-better,https://twitter.com/dog_rates/status/873213775632977920/photo/1,https://twitter.com/dog_rates/status/873213775632977920/photo/1 2
https://twitter.com/dog_rates/status/771380798096281600/photo/1,https://twitter.com/dog_rates/status/771380798096281600/photo/1,https://twitter.com/dog_rates/status/771380798096281600/photo/1,https://twitter.com/dog_rates/status/771380798096281600/photo/1 2
Name: expanded_urls, Length: 79, dtype: int64
for i in range(10):
print(i, df_archive.expanded_urls.iloc[i])
0 https://twitter.com/dog_rates/status/892420643555336193/photo/1 1 https://twitter.com/dog_rates/status/892177421306343426/photo/1 2 https://twitter.com/dog_rates/status/891815181378084864/photo/1 3 https://twitter.com/dog_rates/status/891689557279858688/photo/1 4 https://twitter.com/dog_rates/status/891327558926688256/photo/1,https://twitter.com/dog_rates/status/891327558926688256/photo/1 5 https://twitter.com/dog_rates/status/891087950875897856/photo/1 6 https://gofundme.com/ydvmve-surgery-for-jax,https://twitter.com/dog_rates/status/890971913173991426/photo/1 7 https://twitter.com/dog_rates/status/890729181411237888/photo/1,https://twitter.com/dog_rates/status/890729181411237888/photo/1 8 https://twitter.com/dog_rates/status/890609185150312448/photo/1 9 https://twitter.com/dog_rates/status/890240255349198849/photo/1
expanded_urls, not including NaN's. Is that a problem?expanded_urls sometimes has more than one url, separating them by a comma. This defeats the ability to just click on them to go to the webpage. Examples above: rows 4, 6, 7. Sometimes the second url is just a repeat of the first (as in rows 4 and 7). Sometimes the two urls are different (as in row 6).# Looking at cases without expanded_urls
mask = df_archive.expanded_urls.isnull()
print('There are {} cases without expanded urls.'.format(mask.sum()))
print()
for index, text in df_archive[mask].text.iteritems():
print(index, text)
There are 59 cases without expanded urls. 30 @NonWhiteHat @MayhewMayhem omg hello tanner you are a scary good boy 12/10 would pet with extreme caution 55 @roushfenway These are good dogs but 17/10 is an emotional impulse rating. More like 13/10s 64 @RealKentMurphy 14/10 confirmed 113 @ComplicitOwl @ShopWeRateDogs >10/10 is reserved for dogs 148 @Jack_Septic_Eye I'd need a few more pics to polish a full analysis, but based on the good boy content above I'm leaning towards 12/10 179 @Marc_IRL pixelated af 12/10 185 RT @Jenna_Marbles: @dog_rates Thanks for rating my cermets 14/10 wow I'm so proud I watered them so much 186 @xianmcguire @Jenna_Marbles Kardashians wouldn't be famous if as a society we didn't place enormous value on what they do. The dogs are very deserving of their 14/10 188 @dhmontgomery We also gave snoop dogg a 420/10 but I think that predated your research 189 @s8n You tried very hard to portray this good boy as not so good, but you have ultimately failed. His goodness shines through. 666/10 218 @markhoppus MARK THAT DOG HAS SEEN AND EXPERIENCED MANY THINGS. PROBABLY LOST OTHER EAR DOING SOMETHING HEROIC. 13/10 HUG THE DOG HOPPUS 228 Jerry just apuppologized to me. He said there was no ill-intent to the slippage. I overreacted I admit. Pupgraded to an 11/10 would pet 234 .@breaannanicolee PUPDATE: Cannon has a heart on his nose. Pupgraded to a 13/10 274 @0_kelvin_0 >10/10 is reserved for puppos sorry Kevin 290 @markhoppus 182/10 291 @bragg6of8 @Andy_Pace_ we are still looking for the first 15/10 313 @jonnysun @Lin_Manuel ok jomny I know you're excited but 960/00 isn't a valid rating, 13/10 is tho 342 @docmisterio account started on 11/15/15 346 @UNC can confirm 12/10 375 Beebop and Doobert should start a band 12/10 would listen 387 I was going to do 007/10, but the joke wasn't worth the <10 rating 409 @HistoryInPics 13/10 427 @imgur for a polar bear tho I'd say 13/10 is appropriate 498 I've been informed by multiple sources that this is actually a dog elf who's tired from helping Santa all night. Pupgraded to 12/10 513 PUPDATE: I've been informed that Augie was actually bringing his family these flowers when he tripped. Very good boy. Pupgraded to 11/10 570 .@NBCSports OMG THE TINY HAT I'M GOING TO HAVE TO SAY 11/10 NBC 576 @SkyWilliams doggo simply protecting you from evil that which you cannot see. 11/10 would give extra pets 611 @JODYHiGHROLLER it may be an 11/10 but what do I know 😉 701 13/10 for breakdancing puppo @shibbnbot 707 Today, 10/10, should be National Dog Rates Day 843 His name is Charley and he already has a new set of wheels thanks to donations. I heard his top speed was also increased. 13/10 for Charley 857 @TheEllenShow I'm not sure if you know this but that doggo right there is a 12/10 967 13/10 such a good doggo @spaghemily 1005 Other pupper asked not to have his identity shared. Probably just embarrassed about the headbutt. Also 12/10 it'll be ok mystery pup 1080 @mount_alex3 13/10 1295 @serial @MrRoles OH MY GOD I listened to all of season 1 during a single road trip. I love you guys! I can confirm Bernie's 12/10 rating :) 1345 13/10 hero af @ABC 1445 Oh my god 10/10 for every little hot dog pupper 1446 After reading the comments I may have overestimated this pup. Downgraded to a 1/10. Please forgive me 1474 BREAKING PUPDATE: I've just been notified that (if in U.S.) this dog appears to be operating the vehicle. Upgraded to 10/10. Skilled af 1479 Personally I'd give him an 11/10. Not sure why you think you're qualified to rate such a stellar pup. @CommonWhiteGirI 1497 PUPDATE: just noticed this dog has some extra legs. Very advanced. Revolutionary af. Upgraded to a 9/10 1523 12/10 @LightningHoltt 1598 Yes I do realize a rating of 4/20 would've been fitting. However, it would be unjust to give these cooperative pups that low of a rating 1605 Jack deserves another round of applause. If you missed this earlier today I strongly suggest reading it. Wonderful first 14/10 🐶❤️ 1618 For those who claim this is a goat, u are wrong. It is not the Greatest Of All Time. The rating of 5/10 should have made that clear. Thank u 1663 I'm aware that I could've said 20/16, but here at WeRateDogs we are very professional. An inconsistent rating scale is simply irresponsible 1689 I've been told there's a slight possibility he's checking his mirror. We'll bump to 9.5/10. Still a menace 1774 After getting lost in Reese's eyes for several minutes we're going to upgrade him to a 13/10 1819 After some outrage from the crowd. Bubbles is being upgraded to a 7/10. That's as high as I'm going. Thank you 1844 This dog is being demoted to a 9/10 for not wearing a helmet while riding. Gotta stay safe out there. Thank you 1895 Some clarification is required. The dog is singing Cher and that is more than worthy of an 11/10. Thank you 1905 The 13/10 also takes into account this impeccable yard. Louis is great but the future dad in me can't ignore that luscious green grass 1914 13/10 @ABC7 1940 The millennials have spoken and we've decided to immediately demote to a 1/10. Thank you 2038 After 22 minutes of careful deliberation this dog is being demoted to a 1/10. The longer you look at him the more terrifying he becomes 2149 After countless hours of research and hundreds of formula alterations we have concluded that Dug should be bumped to an 11/10 2189 12/10 good shit Bubka @wane15 2298 After much debate this dog is being upgraded to 10/10. I repeat 10/10
# There are 59 cases without expanded_urls. So, no picture.
df_archive[mask].info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 59 entries, 30 to 2298 Data columns (total 17 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 tweet_id 59 non-null int64 1 in_reply_to_status_id 55 non-null float64 2 in_reply_to_user_id 55 non-null float64 3 timestamp 59 non-null object 4 source 59 non-null object 5 text 59 non-null object 6 retweeted_status_id 1 non-null float64 7 retweeted_status_user_id 1 non-null float64 8 retweeted_status_timestamp 1 non-null object 9 expanded_urls 0 non-null object 10 rating_numerator 59 non-null int64 11 rating_denominator 59 non-null int64 12 name 59 non-null object 13 doggo 59 non-null object 14 floofer 59 non-null object 15 pupper 59 non-null object 16 puppo 59 non-null object dtypes: float64(4), int64(3), object(10) memory usage: 8.3+ KB
Most of the cases without expanded urls (i.e., 59 without pictures) are replies (55). Only 4 are null. The comments are interesting though because they reveal that sometimes ratings get corrected/upgraded, or as they say, "pupgraded".
df_archive[mask & df_archive.in_reply_to_status_id.isnull()]
| tweet_id | in_reply_to_status_id | in_reply_to_user_id | timestamp | source | text | retweeted_status_id | retweeted_status_user_id | retweeted_status_timestamp | expanded_urls | rating_numerator | rating_denominator | name | doggo | floofer | pupper | puppo | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 185 | 856330835276025856 | NaN | NaN | 2017-04-24 02:15:55 +0000 | <a href="http://twitter.com/download/iphone" r... | RT @Jenna_Marbles: @dog_rates Thanks for ratin... | 8.563302e+17 | 66699013.0 | 2017-04-24 02:13:14 +0000 | NaN | 14 | 10 | None | None | None | None | None |
| 375 | 828361771580813312 | NaN | NaN | 2017-02-05 21:56:51 +0000 | <a href="http://twitter.com" rel="nofollow">Tw... | Beebop and Doobert should start a band 12/10 w... | NaN | NaN | NaN | NaN | 12 | 10 | None | None | None | None | None |
| 707 | 785515384317313025 | NaN | NaN | 2016-10-10 16:20:36 +0000 | <a href="http://twitter.com/download/iphone" r... | Today, 10/10, should be National Dog Rates Day | NaN | NaN | NaN | NaN | 10 | 10 | None | None | None | None | None |
| 1445 | 696518437233913856 | NaN | NaN | 2016-02-08 02:18:30 +0000 | <a href="http://twitter.com/download/iphone" r... | Oh my god 10/10 for every little hot dog pupper | NaN | NaN | NaN | NaN | 10 | 10 | None | None | None | pupper | None |
These 4 cases should definitely be dropped. The first is retweeted. The other three seem not to be about a specific dog picture.
df_archive.rating_numerator.value_counts()
12 558 11 464 10 461 13 351 9 158 8 102 7 55 14 54 5 37 6 32 3 19 4 17 1 9 2 9 0 2 15 2 75 2 420 2 182 1 204 1 143 1 121 1 99 1 20 1 45 1 27 1 17 1 24 1 26 1 44 1 50 1 60 1 80 1 84 1 88 1 1776 1 960 1 666 1 144 1 165 1 Name: rating_numerator, dtype: int64
df_archive.rating_denominator.value_counts()
10 2333 11 3 50 3 20 2 80 2 0 1 120 1 7 1 170 1 150 1 130 1 90 1 110 1 2 1 70 1 40 1 16 1 15 1 Name: rating_denominator, dtype: int64
df_archive[['rating_numerator', 'rating_denominator']].describe()
| rating_numerator | rating_denominator | |
|---|---|---|
| count | 2356.000000 | 2356.000000 |
| mean | 13.126486 | 10.455433 |
| std | 45.876648 | 6.745237 |
| min | 0.000000 | 0.000000 |
| 25% | 10.000000 | 10.000000 |
| 50% | 11.000000 | 10.000000 |
| 75% | 12.000000 | 10.000000 |
| max | 1776.000000 | 170.000000 |
rating_denominator has implausible values in some cases (e.g., 50, 80, 170, 0, etc.)rating_numerator has implausible values in some cases (e.g., 420, 75, 80, etc.)df_archive.name.value_counts()[0:20]
None 745 a 55 Charlie 12 Cooper 11 Oliver 11 Lucy 11 Lola 10 Tucker 10 Penny 10 Bo 9 Winston 9 Sadie 8 the 8 an 7 Toby 7 Bailey 7 Daisy 7 Buddy 7 Dave 6 Leo 6 Name: name, dtype: int64
name column: 'None', 'a', 'the', 'an',... doggo, floofer, pupper, puppo¶# What are the ranges of values these variables take?
print(df_archive.doggo.value_counts())
print()
print(df_archive.floofer.value_counts())
print()
print(df_archive.pupper.value_counts())
print()
print(df_archive.puppo.value_counts())
None 2259 doggo 97 Name: doggo, dtype: int64 None 2346 floofer 10 Name: floofer, dtype: int64 None 2099 pupper 257 Name: pupper, dtype: int64 None 2326 puppo 30 Name: puppo, dtype: int64
# Are these attributes mutually exclusive?
df_archive[['doggo', 'floofer', 'pupper', 'puppo']].value_counts(sort=False)
doggo floofer pupper puppo
None None None None 1976
puppo 29
pupper None 245
floofer None None 9
doggo None None None 83
puppo 1
pupper None 12
floofer None None 1
dtype: int64
No, they are not mutually exclusive. Sometimes a "doggo" is also called a "puppo" (1 case). Likewise for the pairs, "doggo" and "pupper" (12 cases), "doggo" and "floofer" (1 case).
# Examine the case with both "doggo" and "puppo"
df_archive.query('(doggo == "doggo") and (puppo == "puppo")')
| tweet_id | in_reply_to_status_id | in_reply_to_user_id | timestamp | source | text | retweeted_status_id | retweeted_status_user_id | retweeted_status_timestamp | expanded_urls | rating_numerator | rating_denominator | name | doggo | floofer | pupper | puppo | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 191 | 855851453814013952 | NaN | NaN | 2017-04-22 18:31:02 +0000 | <a href="http://twitter.com/download/iphone" r... | Here's a puppo participating in the #ScienceMa... | NaN | NaN | NaN | https://twitter.com/dog_rates/status/855851453... | 13 | 10 | None | doggo | None | None | puppo |
df_archive.query('(doggo == "doggo") and (puppo == "puppo")')[['doggo', 'floofer', 'pupper', 'puppo']]
| doggo | floofer | pupper | puppo | |
|---|---|---|---|---|
| 191 | doggo | None | None | puppo |
print(df_archive.loc[191, 'text'])
Here's a puppo participating in the #ScienceMarch. Cleverly disguising her own doggo agenda. 13/10 would keep the planet habitable for https://t.co/cMhq16isel
This is a picture of a black lab with a sign on its back saying "Support Labs". The use of both descriptors for the same dog clearly created some confusion, leading one commenter to ask "which is it? Doggo or puppo?" However, this case illustrates that both attributes can be applied together, suggesting that these are not values of a single variable, but individual variables.
# Examine the cases with both "doggo" and "pupper"
df_archive.groupby(by=['doggo', 'pupper']).size()
doggo pupper
None None 2014
pupper 245
doggo None 85
pupper 12
dtype: int64
df_archive.query('(doggo == "doggo") and (pupper == "pupper")')[['doggo', 'floofer', 'pupper', 'puppo','text']]
| doggo | floofer | pupper | puppo | text | |
|---|---|---|---|---|---|
| 460 | doggo | None | pupper | None | This is Dido. She's playing the lead role in "... |
| 531 | doggo | None | pupper | None | Here we have Burke (pupper) and Dexter (doggo)... |
| 565 | doggo | None | pupper | None | Like doggo, like pupper version 2. Both 11/10 ... |
| 575 | doggo | None | pupper | None | This is Bones. He's being haunted by another d... |
| 705 | doggo | None | pupper | None | This is Pinot. He's a sophisticated doggo. You... |
| 733 | doggo | None | pupper | None | Pupper butt 1, Doggo 0. Both 12/10 https://t.c... |
| 778 | doggo | None | pupper | None | RT @dog_rates: Like father (doggo), like son (... |
| 822 | doggo | None | pupper | None | RT @dog_rates: This is just downright precious... |
| 889 | doggo | None | pupper | None | Meet Maggie & Lila. Maggie is the doggo, L... |
| 956 | doggo | None | pupper | None | Please stop sending it pictures that don't eve... |
| 1063 | doggo | None | pupper | None | This is just downright precious af. 12/10 for ... |
| 1113 | doggo | None | pupper | None | Like father (doggo), like son (pupper). Both 1... |
for index, text in df_archive.query('(doggo == "doggo") and (pupper == "pupper")')['text'].iteritems():
print(index, text)
460 This is Dido. She's playing the lead role in "Pupper Stops to Catch Snow Before Resuming Shadow Box with Dried Apple." 13/10 (IG: didodoggo) https://t.co/m7isZrOBX7 531 Here we have Burke (pupper) and Dexter (doggo). Pupper wants to be exactly like doggo. Both 12/10 would pet at same time https://t.co/ANBpEYHaho 565 Like doggo, like pupper version 2. Both 11/10 https://t.co/9IxWAXFqze 575 This is Bones. He's being haunted by another doggo of roughly the same size. 12/10 deep breaths pupper everything's fine https://t.co/55Dqe0SJNj 705 This is Pinot. He's a sophisticated doggo. You can tell by the hat. Also pointier than your average pupper. Still 10/10 would pet cautiously https://t.co/f2wmLZTPHd 733 Pupper butt 1, Doggo 0. Both 12/10 https://t.co/WQvcPEpH2u 778 RT @dog_rates: Like father (doggo), like son (pupper). Both 12/10 https://t.co/pG2inLaOda 822 RT @dog_rates: This is just downright precious af. 12/10 for both pupper and doggo https://t.co/o5J479bZUC 889 Meet Maggie & Lila. Maggie is the doggo, Lila is the pupper. They are sisters. Both 12/10 would pet at the same time https://t.co/MYwR4DQKll 956 Please stop sending it pictures that don't even have a doggo or pupper in them. Churlish af. 5/10 neat couch tho https://t.co/u2c9c7qSg8 1063 This is just downright precious af. 12/10 for both pupper and doggo https://t.co/o5J479bZUC 1113 Like father (doggo), like son (pupper). Both 12/10 https://t.co/pG2inLaOda
460: One dog, being called both a pupper (developmental use) and a doggo (generic use).
531: Two different dogs. One a puppy and the other an adult. Using "pupper" and "doggo" as developmental stages.
565: Two different dogs, one a puppy and one an adult.
575: Same dog. "doggo" being used generically.
705: Not a dog, but a hedgehog.
733: Two different dogs. Developmental use of terms.
778: Two different dogs. Developmental use.
822: Two different dogs. Developmental use.
889: Two different dogs. Developmental use.
956: One dog. But non-attributive use of terms.
1063: Same as 822
1113: Same as 778
# Examine the cases with both "doggo" and "floofer"
df_archive[['doggo', 'floofer']].value_counts()
doggo floofer None None 2250 doggo None 96 None floofer 9 doggo floofer 1 dtype: int64
df_archive.query('(doggo == "doggo") and (floofer == "floofer")')[['doggo', 'floofer', 'pupper', 'puppo']]
| doggo | floofer | pupper | puppo | |
|---|---|---|---|---|
| 200 | doggo | floofer | None | None |
df_archive.loc[200]
tweet_id 854010172552949760 in_reply_to_status_id NaN in_reply_to_user_id NaN timestamp 2017-04-17 16:34:26 +0000 source <a href="http://twitter.com/download/iphone" r... text At first I thought this was a shy doggo, but i... retweeted_status_id NaN retweeted_status_user_id NaN retweeted_status_timestamp NaN expanded_urls https://twitter.com/dog_rates/status/854010172... rating_numerator 11 rating_denominator 10 name None doggo doggo floofer floofer pupper None puppo None Name: 200, dtype: object
print(df_archive.loc[200, 'text'])
At first I thought this was a shy doggo, but it's actually a Rare Canadian Floofer Owl. Amateurs would confuse the two. 11/10 only send dogs https://t.co/TXdT3tmuYk
print(df_archive.loc[200, 'expanded_urls'].split(',')[0])
https://twitter.com/dog_rates/status/854010172552949760/photo/1
Picture is of a dog in a log with only its fluffy head visible. The remark is meant to be rhetorical. So, it is truly being considered a "doggo" by implication. The designation "floofer" is being used non-attributively in jest. One might quibble about this, however.
doggo, floofer, pupper, and puppo columns should be categorical, not object.rating_denominator has implausible values in some cases (e.g., 50, 80, 170, 0, etc.)rating_numerator has implausible values in some cases (e.g., 420, 75, 80, etc.)name column: 'None', 'a', 'the', 'an',... source should be split into two variables - source_name and source_url - extracted from the HTML. source_name should be categorical and source_url should be string (or object). expanded_urls, not including NaN's. Is that a problem? expanded_urls sometimes has more than one url, separating them by a comma. This defeats the ability to just click on them to go to the webpage. Sometimes the second url is just a repeat of the first (as in rows 4 and 7). Sometimes the two urls are different (as in row 6). # Make copies of the data frames. 0 versions are the copies to be cleaned.
df_archive0 = df_archive.copy()
df_api0 = df_api.copy()
df_predictions0 = df_predictions.copy()
Replace 'None' with pd.NaN in the four columns. Combine the 'doggo', 'floofer', 'pupper', and 'puppo' columns into a single variable, dog_designation, in which these designations become values of the variable. Rare exceptions with dual designations can be handled by allowing the hyphenated values 'floofer-doggo', 'puppo-doggo' and 'pupper-doggo' to stand in for the corresponding combination.
Explanation: The new column dog_designation will register the designation attributed to a dog by We Rate Dogs, when such a designation was given. When no designation was given, it shall take a null value. So, dog_designation does not signify what a dog really is, rather what We Rate Dogs designated, if any designation was given. Second, although 'puppo', 'pupper', and 'doggo' are often used to desginate mutually exclusive stages in the development of a dog (one might call this the developmental use of these terms) so that a 'puppo' is a really young dog, a 'pupper' an adolescent, and a 'doggo' an adult, the terms are not always used this way. The term 'doggo' sometimes has a generic usage, covering any dog no matter what age. With the generic usage of 'doggo' in play, something can be both a pupper and a doggo -- a "pupper-doggo". Similarly, a dog can be a 'puppo-doggo' or a 'fluffer-doggo' when 'doggo' is used in the generic sense. Sometimes, however, dual designations are due to the fact that the tweet concerns a picture with two dogs in it.
# Replace 'None' with pd.nan in the four columns.
df_archive0.doggo.replace('None', np.nan, inplace=True)
df_archive0.floofer.replace('None', np.nan, inplace=True)
df_archive0.pupper.replace('None', np.nan, inplace=True)
df_archive0.puppo.replace('None', np.nan, inplace=True)
# Create the 'dog-designation' column
df_archive0['dog_designation'] = np.nan
# Create a function to generate dog-designation values from the four columns
def dog_value(x):
if pd.notnull(x['doggo']):
if pd.notnull(x['floofer']):
return x['floofer'] + '-' + x['doggo']
elif pd.notnull(x['puppo']):
return x['puppo'] + '-' + x['doggo']
elif pd.notnull(x['pupper']):
return x['pupper'] + '-' + x['doggo']
else:
return x['doggo']
elif pd.notnull(x['floofer']):
return x['floofer']
elif pd.notnull(x['puppo']):
return x['puppo']
elif pd.notnull(x['pupper']):
return x['pupper']
else:
return x['dog_designation']
df_archive0['dog_designation'] = df_archive0.apply(dog_value, axis=1)
df_archive0[df_archive0.dog_designation.notnull()].sample(5)[['tweet_id',
'doggo',
'floofer',
'puppo',
'pupper',
'dog_designation']]
| tweet_id | doggo | floofer | puppo | pupper | dog_designation | |
|---|---|---|---|---|---|---|
| 339 | 832273440279240704 | doggo | NaN | NaN | NaN | doggo |
| 323 | 834089966724603904 | doggo | NaN | NaN | NaN | doggo |
| 944 | 752682090207055872 | doggo | NaN | NaN | NaN | doggo |
| 1653 | 683462770029932544 | NaN | NaN | NaN | pupper | pupper |
| 220 | 850019790995546112 | NaN | NaN | NaN | pupper | pupper |
# Clean up
df_archive0.drop(columns=['doggo', 'floofer', 'puppo', 'pupper'], inplace=True)
df_archive0.dog_designation = df_archive0.dog_designation.astype('category')
print('The number of records with non-null values for dog_designation is:')
print(df_archive0.dog_designation.value_counts().sum())
df_archive0.dog_designation.value_counts()
The number of records with non-null values for dog_designation is: 380
pupper 245 doggo 83 puppo 29 pupper-doggo 12 floofer 9 floofer-doggo 1 puppo-doggo 1 Name: dog_designation, dtype: int64
# Check for match with the original on total and distribution
print('The number of original records with non-null designations in the four categories is:')
print(df_archive[['doggo', 'floofer', 'pupper', 'puppo']].value_counts()[1:].sum())
df_archive[['doggo', 'floofer', 'pupper', 'puppo']].value_counts()
The number of original records with non-null designations in the four categories is: 380
doggo floofer pupper puppo
None None None None 1976
pupper None 245
doggo None None None 83
None None None puppo 29
doggo None pupper None 12
None floofer None None 9
doggo None None puppo 1
floofer None None 1
dtype: int64
# Visual column check
df_archive0[df_archive0.dog_designation.notnull()].iloc[:, np.r_[0, 10:14]].head()
| tweet_id | rating_numerator | rating_denominator | name | dog_designation | |
|---|---|---|---|---|---|
| 9 | 890240255349198849 | 14 | 10 | Cassie | doggo |
| 12 | 889665388333682689 | 13 | 10 | None | puppo |
| 14 | 889531135344209921 | 13 | 10 | Stuart | puppo |
| 29 | 886366144734445568 | 12 | 10 | Roscoe | pupper |
| 43 | 884162670584377345 | 12 | 10 | Yogi | doggo |
df_api with df_archive¶Merge the two dataframes.
df_archive0 = df_archive0.merge(df_api0, how='left', on='tweet_id')
df_archive0.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 2356 entries, 0 to 2355 Data columns (total 19 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 tweet_id 2356 non-null int64 1 in_reply_to_status_id 78 non-null float64 2 in_reply_to_user_id 78 non-null float64 3 timestamp 2356 non-null object 4 source 2356 non-null object 5 text 2356 non-null object 6 retweeted_status_id 181 non-null float64 7 retweeted_status_user_id 181 non-null float64 8 retweeted_status_timestamp 181 non-null object 9 expanded_urls 2297 non-null object 10 rating_numerator 2356 non-null int64 11 rating_denominator 2356 non-null int64 12 name 2356 non-null object 13 dog_designation 380 non-null category 14 favorite_count 2331 non-null float64 15 retweet_count 2331 non-null float64 16 user_id 2331 non-null float64 17 user_name 2331 non-null object 18 user_url 2331 non-null object dtypes: category(1), float64(7), int64(3), object(8) memory usage: 352.4+ KB
New Cleaning Issue: favorite_count and retweet_count should be int, not float64
Change favorite_count and retweet_count to Int64.
# Convert columns to best possible dtypes using dtypes supporting pd.NA.
df_archive0 = df_archive0.convert_dtypes()
df_archive0.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 2356 entries, 0 to 2355 Data columns (total 19 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 tweet_id 2356 non-null Int64 1 in_reply_to_status_id 78 non-null Int64 2 in_reply_to_user_id 78 non-null Int64 3 timestamp 2356 non-null string 4 source 2356 non-null string 5 text 2356 non-null string 6 retweeted_status_id 181 non-null Int64 7 retweeted_status_user_id 181 non-null Int64 8 retweeted_status_timestamp 181 non-null string 9 expanded_urls 2297 non-null string 10 rating_numerator 2356 non-null Int64 11 rating_denominator 2356 non-null Int64 12 name 2356 non-null string 13 dog_designation 380 non-null category 14 favorite_count 2331 non-null Int64 15 retweet_count 2331 non-null Int64 16 user_id 2331 non-null Int64 17 user_name 2331 non-null string 18 user_url 2331 non-null string dtypes: Int64(10), category(1), string(8) memory usage: 375.4 KB
Change timestamp and retweeted_status_timestamp to datetime. Change all "_id" columns to string.
df_archive0.timestamp = pd.to_datetime(df_archive0.timestamp)
df_archive0.retweeted_status_timestamp = pd.to_datetime(df_archive0.retweeted_status_timestamp)
df_archive0.tweet_id = df_archive0.tweet_id.astype('string')
df_archive0.in_reply_to_status_id = df_archive.in_reply_to_status_id.astype('string')
df_archive0.in_reply_to_user_id = df_archive.in_reply_to_user_id.astype('string')
df_archive0.retweeted_status_id = df_archive.retweeted_status_id.astype('string')
df_archive0.retweeted_status_user_id = df_archive.retweeted_status_user_id.astype('string')
df_archive0.user_id = df_archive0.user_id.astype('string')
df_archive0.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 2356 entries, 0 to 2355 Data columns (total 19 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 tweet_id 2356 non-null string 1 in_reply_to_status_id 78 non-null string 2 in_reply_to_user_id 78 non-null string 3 timestamp 2356 non-null datetime64[ns, UTC] 4 source 2356 non-null string 5 text 2356 non-null string 6 retweeted_status_id 181 non-null string 7 retweeted_status_user_id 181 non-null string 8 retweeted_status_timestamp 181 non-null datetime64[ns, UTC] 9 expanded_urls 2297 non-null string 10 rating_numerator 2356 non-null Int64 11 rating_denominator 2356 non-null Int64 12 name 2356 non-null string 13 dog_designation 380 non-null category 14 favorite_count 2331 non-null Int64 15 retweet_count 2331 non-null Int64 16 user_id 2331 non-null string 17 user_name 2331 non-null string 18 user_url 2331 non-null string dtypes: Int64(4), category(1), datetime64[ns, UTC](2), string(12) memory usage: 361.6 KB
source_name and source_url - extracted from the HTML. source_name should be categorical and source_url should be string (or object).¶Extract source_name and source_weblink from the HTML. Assign the values to the appropriate new columns. Change source_name to categorical. Source_weblink should already be string (or object). Drop source column.
pattern = r'"(?P<source_weblink>http\S+)".+>(?P<source_name>.+)<'
df_archive0[['source_url', 'source_name']] = df_archive0.source.str.extract(pattern)
df_archive0.source_name = df_archive0.source_name.astype('category')
df_archive0[['tweet_id','source','source_url', 'source_name']].head()
| tweet_id | source | source_url | source_name | |
|---|---|---|---|---|
| 0 | 892420643555336193 | <a href="http://twitter.com/download/iphone" r... | http://twitter.com/download/iphone | Twitter for iPhone |
| 1 | 892177421306343426 | <a href="http://twitter.com/download/iphone" r... | http://twitter.com/download/iphone | Twitter for iPhone |
| 2 | 891815181378084864 | <a href="http://twitter.com/download/iphone" r... | http://twitter.com/download/iphone | Twitter for iPhone |
| 3 | 891689557279858688 | <a href="http://twitter.com/download/iphone" r... | http://twitter.com/download/iphone | Twitter for iPhone |
| 4 | 891327558926688256 | <a href="http://twitter.com/download/iphone" r... | http://twitter.com/download/iphone | Twitter for iPhone |
# Clean Up
df_archive0.drop(columns='source', inplace=True)
# Check that source is replaced by source_name and source_weblink
'source' in df_archive0.columns
False
# Check: the distribution is the same as the original
df_archive0.source_name.value_counts()
Twitter for iPhone 2221 Vine - Make a Scene 91 Twitter Web Client 33 TweetDeck 11 Name: source_name, dtype: int64
df_archive.source.value_counts()
<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a> 2221 <a href="http://vine.co" rel="nofollow">Vine - Make a Scene</a> 91 <a href="http://twitter.com" rel="nofollow">Twitter Web Client</a> 33 <a href="https://about.twitter.com/products/tweetdeck" rel="nofollow">TweetDeck</a> 11 Name: source, dtype: int64
expanded_urls sometimes has more than one url, separating them by a comma. This defeats the ability to just click on them to go to the webpage. Sometimes the second url is just a repeat of the first (as in rows 4 and 7). Sometimes the two urls are different (as in row 6).¶Split expanded_urls into two columns -- url_1 and url_2. Where the urls are the same, only record one of them, leaving a null value for url_2.
# Turn column of strings into column of lists of urls, removing duplicates
s_urls = df_archive0.expanded_urls.str.split(pat=',').map(lambda x: list(set(x)), na_action='ignore')
s_urls
0 [https://twitter.com/dog_rates/status/89242064...
1 [https://twitter.com/dog_rates/status/89217742...
2 [https://twitter.com/dog_rates/status/89181518...
3 [https://twitter.com/dog_rates/status/89168955...
4 [https://twitter.com/dog_rates/status/89132755...
...
2351 [https://twitter.com/dog_rates/status/66604924...
2352 [https://twitter.com/dog_rates/status/66604422...
2353 [https://twitter.com/dog_rates/status/66603341...
2354 [https://twitter.com/dog_rates/status/66602928...
2355 [https://twitter.com/dog_rates/status/66602088...
Name: expanded_urls, Length: 2356, dtype: object
# How many lists have >2 elements/urls?
s_urls.map(lambda x: len(x) > 2, na_action='ignore').sum()
0
df_archive0['url_1'] = s_urls.map(lambda x: x[0], na_action='ignore').to_frame(name='url_1')
df_archive0['url_2'] = s_urls.map(lambda x: x[1] if len(x) == 2 else np.nan, na_action='ignore').to_frame('url_2')
df_archive0.url_1 = df_archive0.url_1.astype('string')
df_archive0.url_2 = df_archive0.url_2.astype('string')
# Examine rows with 2 urls
df_archive0[df_archive0.url_2.notnull()][['expanded_urls', 'url_1', 'url_2']]
| expanded_urls | url_1 | url_2 | |
|---|---|---|---|
| 6 | https://gofundme.com/ydvmve-surgery-for-jax,ht... | https://twitter.com/dog_rates/status/890971913... | https://gofundme.com/ydvmve-surgery-for-jax |
| 27 | https://www.gofundme.com/mingusneedsus,https:/... | https://www.gofundme.com/mingusneedsus | https://twitter.com/dog_rates/status/886736880... |
| 73 | https://www.gofundme.com/3yd6y1c,https://twitt... | https://www.gofundme.com/3yd6y1c | https://twitter.com/dog_rates/status/878281511... |
| 75 | https://www.gofundme.com/3yd6y1c,https://twitt... | https://www.gofundme.com/3yd6y1c | https://twitter.com/dog_rates/status/878281511... |
| 97 | https://www.gofundme.com/help-my-baby-sierra-g... | https://www.gofundme.com/help-my-baby-sierra-g... | https://twitter.com/dog_rates/status/873213775... |
| 98 | https://www.gofundme.com/help-my-baby-sierra-g... | https://www.gofundme.com/help-my-baby-sierra-g... | https://twitter.com/dog_rates/status/873213775... |
| 114 | https://www.gofundme.com/help-fix-codys-torn-a... | https://twitter.com/dog_rates/status/870656317... | https://www.gofundme.com/help-fix-codys-torn-acl |
| 124 | https://www.gofundme.com/3ti3nps,https://twitt... | https://www.gofundme.com/3ti3nps | https://twitter.com/dog_rates/status/868552278... |
| 126 | https://www.gofundme.com/3ti3nps,https://twitt... | https://www.gofundme.com/3ti3nps | https://twitter.com/dog_rates/status/868552278... |
| 146 | https://www.gofundme.com/helpquinny,https://tw... | https://twitter.com/dog_rates/status/863062471... | https://www.gofundme.com/helpquinny |
| 150 | https://www.gofundme.com/helpquinny,https://tw... | https://twitter.com/dog_rates/status/863062471... | https://www.gofundme.com/helpquinny |
| 159 | https://www.gofundme.com/help-lorenzo-beat-can... | https://www.gofundme.com/help-lorenzo-beat-cancer | https://twitter.com/dog_rates/status/860563773... |
| 161 | https://www.gofundme.com/help-lorenzo-beat-can... | https://www.gofundme.com/help-lorenzo-beat-cancer | https://twitter.com/dog_rates/status/860563773... |
| 177 | https://www.gofundme.com/meeko-needs-heart-sur... | https://www.gofundme.com/meeko-needs-heart-sur... | https://twitter.com/dog_rates/status/857393404... |
| 204 | http://www.gofundme.com/bluethewhitehusky,http... | https://twitter.com/dog_rates/status/831650051... | http://www.gofundme.com/bluethewhitehusky |
| 205 | https://www.gofundme.com/bennys-medical-bills,... | https://twitter.com/dog_rates/status/852912242... | https://www.gofundme.com/bennys-medical-bills |
| 233 | https://www.gofundme.com/help-save-rontu,https... | https://twitter.com/dog_rates/status/847842811... | https://www.gofundme.com/help-save-rontu |
| 237 | https://www.petfinder.com/petdetail/37334596,h... | https://twitter.com/dog_rates/status/847157206... | https://www.petfinder.com/petdetail/37334596 |
| 248 | https://www.gofundme.com/help-save-a-pup,https... | https://twitter.com/dog_rates/status/845397057... | https://www.gofundme.com/help-save-a-pup |
| 262 | https://www.gofundme.com/get-indie-home/,https... | https://twitter.com/dog_rates/status/842765311... | https://www.gofundme.com/get-indie-home/ |
| 272 | https://www.gofundme.com/3hgsuu0,https://twitt... | https://twitter.com/dog_rates/status/840632337... | https://www.gofundme.com/3hgsuu0 |
| 276 | https://www.gofundme.com/3hgsuu0,https://twitt... | https://twitter.com/dog_rates/status/840632337... | https://www.gofundme.com/3hgsuu0 |
| 309 | https://www.gofundme.com/lolas-life-saving-sur... | https://twitter.com/dog_rates/status/835264098... | https://www.gofundme.com/lolas-life-saving-sur... |
| 312 | https://www.gofundme.com/lolas-life-saving-sur... | https://twitter.com/dog_rates/status/835264098... | https://www.gofundme.com/lolas-life-saving-sur... |
| 344 | https://www.petfinder.com/petdetail/34918210,h... | https://www.petfinder.com/petdetail/34918210 | https://twitter.com/dog_rates/status/832032802... |
| 349 | http://www.gofundme.com/bluethewhitehusky,http... | https://twitter.com/dog_rates/status/831650051... | http://www.gofundme.com/bluethewhitehusky |
| 358 | https://www.gofundme.com/sick-baby-samson,http... | https://twitter.com/dog_rates/status/830097400... | https://www.gofundme.com/sick-baby-samson |
| 391 | http://us.blastingnews.com/news/2017/01/kentuc... | http://us.blastingnews.com/news/2017/01/kentuc... | https://twitter.com/dog_rates/status/826204788... |
| 397 | https://www.gofundme.com/my-puppys-double-cata... | https://twitter.com/dog_rates/status/825026590... | https://www.gofundme.com/my-puppys-double-cata... |
| 398 | https://www.gofundme.com/my-puppys-double-cata... | https://twitter.com/dog_rates/status/825026590... | https://www.gofundme.com/my-puppys-double-cata... |
| 434 | https://www.loveyourmelon.com/pages/ourstory,h... | https://www.loveyourmelon.com/pages/ourstory | https://twitter.com/dog_rates/status/820314633... |
| 436 | https://www.loveyourmelon.com/pages/ourstory,h... | https://www.loveyourmelon.com/pages/ourstory | https://twitter.com/dog_rates/status/820314633... |
| 438 | https://www.gofundme.com/servicedogoliver,http... | https://www.gofundme.com/servicedogoliver | https://twitter.com/dog_rates/status/819952236... |
| 439 | https://www.gofundme.com/servicedogoliver,http... | https://www.gofundme.com/servicedogoliver | https://twitter.com/dog_rates/status/819952236... |
| 464 | https://www.gofundme.com/help-strudel-walk-aga... | https://www.gofundme.com/help-strudel-walk-aga... | https://twitter.com/dog_rates/status/817415592... |
| 475 | https://www.gofundme.com/surgeryforjacktheminp... | https://twitter.com/dog_rates/status/815990720... | https://www.gofundme.com/surgeryforjacktheminpin |
| 477 | https://www.gofundme.com/surgeryforjacktheminp... | https://twitter.com/dog_rates/status/815990720... | https://www.gofundme.com/surgeryforjacktheminpin |
| 508 | https://m.facebook.com/story.php?story_fbid=18... | https://m.facebook.com/story.php?story_fbid=18... | https://twitter.com/dog_rates/status/812503143... |
| 516 | https://www.gofundme.com/sams-smile,https://tw... | https://www.gofundme.com/sams-smile | https://twitter.com/dog_rates/status/810984652... |
| 742 | https://www.patreon.com/WeRateDogs,https://twi... | https://www.patreon.com/WeRateDogs | https://twitter.com/Patreon/status/78046570929... |
| 1265 | http://goo.gl/ArWZfi,https://twitter.com/dog_r... | http://goo.gl/ArWZfi | https://twitter.com/dog_rates/status/709901256... |
df_archive0.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 2356 entries, 0 to 2355 Data columns (total 22 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 tweet_id 2356 non-null string 1 in_reply_to_status_id 78 non-null string 2 in_reply_to_user_id 78 non-null string 3 timestamp 2356 non-null datetime64[ns, UTC] 4 text 2356 non-null string 5 retweeted_status_id 181 non-null string 6 retweeted_status_user_id 181 non-null string 7 retweeted_status_timestamp 181 non-null datetime64[ns, UTC] 8 expanded_urls 2297 non-null string 9 rating_numerator 2356 non-null Int64 10 rating_denominator 2356 non-null Int64 11 name 2356 non-null string 12 dog_designation 380 non-null category 13 favorite_count 2331 non-null Int64 14 retweet_count 2331 non-null Int64 15 user_id 2331 non-null string 16 user_name 2331 non-null string 17 user_url 2331 non-null string 18 source_url 2356 non-null string 19 source_name 2356 non-null category 20 url_1 2297 non-null string 21 url_2 41 non-null string dtypes: Int64(4), category(2), datetime64[ns, UTC](2), string(14) memory usage: 400.9 KB
df_archive0.drop(columns='expanded_urls', inplace=True)
'expanded_urls' in df_archive0.columns
False
Drop the 181 rows with retweets. In addition, drop the columns dealing with retweets.
df_archive0.drop(index = df_archive0[df_archive0.retweeted_status_id.notnull()].index, inplace=True)
df_archive0.drop(columns=['retweeted_status_id',
'retweeted_status_user_id',
'retweeted_status_timestamp'], inplace=True)
df_archive0.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 2175 entries, 0 to 2355 Data columns (total 18 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 tweet_id 2175 non-null string 1 in_reply_to_status_id 78 non-null string 2 in_reply_to_user_id 78 non-null string 3 timestamp 2175 non-null datetime64[ns, UTC] 4 text 2175 non-null string 5 rating_numerator 2175 non-null Int64 6 rating_denominator 2175 non-null Int64 7 name 2175 non-null string 8 dog_designation 344 non-null category 9 favorite_count 2168 non-null Int64 10 retweet_count 2168 non-null Int64 11 user_id 2168 non-null string 12 user_name 2168 non-null string 13 user_url 2168 non-null string 14 source_url 2175 non-null string 15 source_name 2175 non-null category 16 url_1 2117 non-null string 17 url_2 28 non-null string dtypes: Int64(4), category(2), datetime64[ns, UTC](1), string(11) memory usage: 302.2 KB
Expanded_urls has been converted to url_1 and url_2. Since the latter were populated with urls in sequential order, url_1 alone can be used to tell us when expanded_urls was null.
print('After dropping retweets, there are {} rows in which url_1 is null.'.format(df_archive0.url_1.isnull().sum()))
df_archive0[df_archive0.url_1.isnull()][['in_reply_to_status_id', 'text', 'url_1']]
After dropping retweets, there are 58 rows in which url_1 is null.
| in_reply_to_status_id | text | url_1 | |
|---|---|---|---|
| 30 | 8.862663570751283e+17 | @NonWhiteHat @MayhewMayhem omg hello tanner yo... | <NA> |
| 55 | 8.816070373140521e+17 | @roushfenway These are good dogs but 17/10 is ... | <NA> |
| 64 | 8.795538273341727e+17 | @RealKentMurphy 14/10 confirmed | <NA> |
| 113 | 8.707262027424932e+17 | @ComplicitOwl @ShopWeRateDogs >10/10 is res... | <NA> |
| 148 | 8.634256455687741e+17 | @Jack_Septic_Eye I'd need a few more pics to p... | <NA> |
| 179 | 8.571566780553421e+17 | @Marc_IRL pixelated af 12/10 | <NA> |
| 186 | 8.562860041095537e+17 | @xianmcguire @Jenna_Marbles Kardashians wouldn... | <NA> |
| 188 | 8.558615844633518e+17 | @dhmontgomery We also gave snoop dogg a 420/10... | <NA> |
| 189 | 8.558585356070011e+17 | @s8n You tried very hard to portray this good ... | <NA> |
| 218 | 8.503288187788206e+17 | @markhoppus MARK THAT DOG HAS SEEN AND EXPERIE... | <NA> |
| 228 | 8.482121117298401e+17 | Jerry just apuppologized to me. He said there ... | <NA> |
| 234 | 8.476061755961385e+17 | .@breaannanicolee PUPDATE: Cannon has a heart ... | <NA> |
| 274 | 8.406983002988626e+17 | @0_kelvin_0 >10/10 is reserved for puppos s... | <NA> |
| 290 | 8.381454986911949e+17 | @markhoppus 182/10 | <NA> |
| 291 | 8.380855393624023e+17 | @bragg6of8 @Andy_Pace_ we are still looking fo... | <NA> |
| 313 | 8.35245984028504e+17 | @jonnysun @Lin_Manuel ok jomny I know you're e... | <NA> |
| 342 | 8.320875475599974e+17 | @docmisterio account started on 11/15/15 | <NA> |
| 346 | 8.319030442248356e+17 | @UNC can confirm 12/10 | <NA> |
| 375 | <NA> | Beebop and Doobert should start a band 12/10 w... | <NA> |
| 387 | 8.265983652700078e+17 | I was going to do 007/10, but the joke wasn't ... | <NA> |
| 409 | 8.233263893362442e+17 | @HistoryInPics 13/10 | <NA> |
| 427 | 8.21152592717697e+17 | @imgur for a polar bear tho I'd say 13/10 is a... | <NA> |
| 498 | 8.13127251579564e+17 | I've been informed by multiple sources that th... | <NA> |
| 513 | 8.116272330434806e+17 | PUPDATE: I've been informed that Augie was act... | <NA> |
| 570 | 8.018543306724475e+17 | .@NBCSports OMG THE TINY HAT I'M GOING TO HAVE... | <NA> |
| 576 | 8.008579544172626e+17 | @SkyWilliams doggo simply protecting you from ... | <NA> |
| 611 | 7.97123751162839e+17 | @JODYHiGHROLLER it may be an 11/10 but what do... | <NA> |
| 701 | 7.727430194476073e+17 | 13/10 for breakdancing puppo @shibbnbot | <NA> |
| 707 | <NA> | Today, 10/10, should be National Dog Rates Day | <NA> |
| 843 | 7.667118193648886e+17 | His name is Charley and he already has a new s... | <NA> |
| 857 | 7.638651745539645e+17 | @TheEllenShow I'm not sure if you know this bu... | <NA> |
| 967 | 7.501804988324045e+17 | 13/10 such a good doggo @spaghemily | <NA> |
| 1005 | 7.476486538174136e+17 | Other pupper asked not to have his identity sh... | <NA> |
| 1080 | 7.384119198962852e+17 | @mount_alex3 13/10 | <NA> |
| 1295 | 7.079800658926674e+17 | @serial @MrRoles OH MY GOD I listened to all o... | <NA> |
| 1345 | 7.044857446399099e+17 | 13/10 hero af @ABC | <NA> |
| 1445 | <NA> | Oh my god 10/10 for every little hot dog pupper | <NA> |
| 1446 | 6.964887109012603e+17 | After reading the comments I may have overesti... | <NA> |
| 1474 | 6.936422321512858e+17 | BREAKING PUPDATE: I've just been notified that... | <NA> |
| 1479 | 6.935722159383675e+17 | Personally I'd give him an 11/10. Not sure why... | <NA> |
| 1497 | 6.924173130233324e+17 | PUPDATE: just noticed this dog has some extra ... | <NA> |
| 1523 | 6.903412535490028e+17 | 12/10 @LightningHoltt | <NA> |
| 1598 | 6.860340248008622e+17 | Yes I do realize a rating of 4/20 would've bee... | <NA> |
| 1605 | 6.855479360386662e+17 | Jack deserves another round of applause. If yo... | <NA> |
| 1618 | 6.849597985851105e+17 | For those who claim this is a goat, u are wron... | <NA> |
| 1663 | 6.827884415375606e+17 | I'm aware that I could've said 20/16, but here... | <NA> |
| 1689 | 6.813394486558024e+17 | I've been told there's a slight possibility he... | <NA> |
| 1774 | 6.780211157180293e+17 | After getting lost in Reese's eyes for several... | <NA> |
| 1819 | 6.765883460978524e+17 | After some outrage from the crowd. Bubbles is ... | <NA> |
| 1844 | 6.758456573542154e+17 | This dog is being demoted to a 9/10 for not we... | <NA> |
| 1895 | 6.747399531344036e+17 | Some clarification is required. The dog is sin... | <NA> |
| 1905 | 6.744688808997888e+17 | The 13/10 also takes into account this impecca... | <NA> |
| 1914 | 6.658146967007232e+17 | 13/10 @ABC7 | <NA> |
| 1940 | 6.737158618537206e+17 | The millennials have spoken and we've decided ... | <NA> |
| 2038 | 6.715448741650022e+17 | After 22 minutes of careful deliberation this ... | <NA> |
| 2149 | 6.693543826270495e+17 | After countless hours of research and hundreds... | <NA> |
| 2189 | 6.689207171325829e+17 | 12/10 good shit Bubka @wane15 | <NA> |
| 2298 | 6.670655355705508e+17 | After much debate this dog is being upgraded t... | <NA> |
df_archive0 = df_archive0[df_archive0.url_1.notnull()]
df_archive0.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 2117 entries, 0 to 2355 Data columns (total 18 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 tweet_id 2117 non-null string 1 in_reply_to_status_id 23 non-null string 2 in_reply_to_user_id 23 non-null string 3 timestamp 2117 non-null datetime64[ns, UTC] 4 text 2117 non-null string 5 rating_numerator 2117 non-null Int64 6 rating_denominator 2117 non-null Int64 7 name 2117 non-null string 8 dog_designation 338 non-null category 9 favorite_count 2110 non-null Int64 10 retweet_count 2110 non-null Int64 11 user_id 2110 non-null string 12 user_name 2110 non-null string 13 user_url 2110 non-null string 14 source_url 2117 non-null string 15 source_name 2117 non-null category 16 url_1 2117 non-null string 17 url_2 28 non-null string dtypes: Int64(4), category(2), datetime64[ns, UTC](1), string(11) memory usage: 294.1 KB
# Show the text for the rows with duplicated url_1
for row in df_archive0[df_archive0.duplicated('url_1', keep=False)].text:
print(row)
Vine will be deeply missed. This was by far my favorite one. 14/10 https://t.co/roqIxCvEB3 Never forget this vine. You will not stop watching for at least 15 minutes. This is the second coveted.. 13/10 https://t.co/roqIxCvEB3
df_archive0.drop_duplicates(subset='url_1', keep='first', inplace=True)
df_archive0[df_archive0.duplicated('url_1', keep=False)].shape[0]
0
df_archive0.url_1.isnull().sum()
0
cols = df_archive0.columns.tolist()
cols
['tweet_id', 'in_reply_to_status_id', 'in_reply_to_user_id', 'timestamp', 'text', 'rating_numerator', 'rating_denominator', 'name', 'dog_designation', 'favorite_count', 'retweet_count', 'user_id', 'user_name', 'user_url', 'source_url', 'source_name', 'url_1', 'url_2']
cols = ['tweet_id',
'timestamp',
'text',
'url_1',
'url_2',
'name',
'dog_designation',
'rating_numerator',
'rating_denominator',
'favorite_count',
'retweet_count',
'user_id',
'user_name',
'user_url',
'in_reply_to_status_id',
'in_reply_to_user_id',
'source_url',
'source_name']
df_archive0 = df_archive0[cols]
df_archive0.head(2)
| tweet_id | timestamp | text | url_1 | url_2 | name | dog_designation | rating_numerator | rating_denominator | favorite_count | retweet_count | user_id | user_name | user_url | in_reply_to_status_id | in_reply_to_user_id | source_url | source_name | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 892420643555336193 | 2017-08-01 16:23:56+00:00 | This is Phineas. He's a mystical boy. Only eve... | https://twitter.com/dog_rates/status/892420643... | <NA> | Phineas | NaN | 13 | 10 | 35001 | 7349 | 4196983835 | WeRateDogs® | https://t.co/6ytGi24QCk | <NA> | <NA> | http://twitter.com/download/iphone | Twitter for iPhone |
| 1 | 892177421306343426 | 2017-08-01 00:17:27+00:00 | This is Tilly. She's just checking pup on you.... | https://twitter.com/dog_rates/status/892177421... | <NA> | Tilly | NaN | 13 | 10 | 30305 | 5480 | 4196983835 | WeRateDogs® | https://t.co/6ytGi24QCk | <NA> | <NA> | http://twitter.com/download/iphone | Twitter for iPhone |
First, check whether the duplicates (778, 822) have not already been eliminated. Second, if they have not, eliminate them.
# The retweets have already been eliminated
778 in df_archive0.index, 822 in df_archive0
(False, False)
# The originals are still present
1113 in df_archive0.index, 1063 in df_archive0.index
(True, True)
Change 'dog_designation' to 'doggo' if it is not already.
df_archive0.loc[200, 'dog_designation'] = 'doggo'
for item in df_archive0.loc[200][['url_1','dog_designation']]:
print(item)
https://twitter.com/dog_rates/status/854010172552949760/photo/1 doggo
df_archive0.drop(labels=[531, 565, 733, 889, 1063, 1113], axis=0, inplace=True)
# Check 'pupper-doggo' designation to see if it concerns two different dogs.
df_archive0.loc[df_archive0.dog_designation == 'pupper-doggo'][['text', 'name', 'dog_designation']]
| text | name | dog_designation | |
|---|---|---|---|
| 460 | This is Dido. She's playing the lead role in "... | Dido | pupper-doggo |
| 575 | This is Bones. He's being haunted by another d... | Bones | pupper-doggo |
| 705 | This is Pinot. He's a sophisticated doggo. You... | Pinot | pupper-doggo |
| 956 | Please stop sending it pictures that don't eve... | None | pupper-doggo |
Drop the row for index number 705.
# Get text data and check
print(df_archive0.loc[705, 'text'])
This is Pinot. He's a sophisticated doggo. You can tell by the hat. Also pointier than your average pupper. Still 10/10 would pet cautiously https://t.co/f2wmLZTPHd
df_archive0 = df_archive0.drop(index=705)
# Check that the tweet_id is gone
705 in df_archive0.index
False
name column: 'None', 'a', 'the', 'an',...¶Extract names from the text column and compare the result with what was provided originally, replacing the original if the result improves on it. Where no name is extracted, fill in with a null value instead of the word 'None'.
# Original development:
#pattern = r'([Tt][Hh][Ii][Ss][ \.]+[Ii][Ss][ \.]+|Meet |[Hh]ello to |[Nn][Aa][Mm][Ee][ \.]+[Ii][Ss][ \.]+|[Hh]ere is |Here we have |changed his name to |RIP |featuring |named |pup is |pictures of )([A-Z]\'?\w+)'
# Easier to read the parts:
a = '(?P<indicator_>[Tt][Hh][Ii][Ss][ \.]+[Ii][Ss][ \.]+'
b = '|Meet '
c = '|[Hh]ello to '
d = '|[Nn][Aa][Mm][Ee][ \.]+[Ii][Ss][ \.]+'
e = '|[Hh]ere is '
f = '|Here we have |changed his name to |RIP |featuring '
g = '|named |pup is |pictures of |\d+/\d+ for (?:both \w+ and )?)'
h = '(?P<name_>[A-Z]\'?\w+)'
pattern = r''+ a + b + c + d + e + f + g + h
df_names = df_archive0.text.str.extract(pattern, expand=True)
print('The number of names retrieved for dogs from the new text extraction is {}.'.format(df_names.name_.notnull().sum()))
df_names.sample(10)
The number of names retrieved for dogs from the new text extraction is 1437.
| indicator_ | name_ | |
|---|---|---|
| 2237 | pup is | Oliver |
| 785 | This is | Tucker |
| 1753 | <NA> | <NA> |
| 1755 | This is | Tug |
| 1897 | Meet | Rufio |
| 1645 | This is | Jiminy |
| 2247 | <NA> | <NA> |
| 1266 | <NA> | <NA> |
| 1296 | Meet | Rufus |
| 158 | This is | Burt |
Note: The process used to develop the Regex pattern used to find names involved looking at lines of text from df_archive0, then trying out regex patterns at the regex tester website https://pythex.org/. As a regex pattern was developed it was used to isolate lines of text in df_archive0 that did NOT yield a name in order to visually examine those lines to look for patterns missed so that they could be added to the regex pattern and tested. The code below represents some of how that process occurred, where pattern kept being updated with new regex patterns.
#pattern = r'([Tt][Hh][Ii][Ss][ \.]+[Ii][Ss][ \.]+|Meet |[Hh]ello to |[Nn][Aa][Mm][Ee][ \.]+[Ii][Ss][ \.]+|[Hh]ere is |Here we have |changed his name to |RIP |featuring |named |pup is |pictures of )([A-Z]\'?\w+)'
#df_names = df_archive0.text.str.extract(pattern, expand=True)
#print(df_names[1].isnull().sum())
#print()
#for text in df_archive0[df_names[1].isnull()].text:
# print(text)
problem_names = ['None', 'a', 'an', 'the', 'very', 'one', 'O', 'my']
df_names.name_.isin(problem_names).sum()
0
None of the new names are in the list of problem names from the old extraction.
print('The number of names retrieved for dogs from the original text extraction is {}.'.format(df_archive0.name.notnull().sum()))
The number of names retrieved for dogs from the original text extraction is 2109.
mask = df_archive0.name.isin(problem_names)
print(mask.sum())
df_archive0[mask].name.value_counts()
696
None 618 a 55 the 7 an 6 very 4 one 4 O 1 my 1 Name: name, dtype: Int64
The original name extraction resulted in 2109 names but 696 of them were clearly problematic, leaving 2109 - 696 = 1413 non-problematic names. The new extraction results in 1437 (non-problematic) names.
# Compare the new names with the old `name` column where the old was not problematic
df_archive0['name_'] = df_names.name_
df_archive0[(df_archive0.name != df_archive0.name_)
& (~ mask )][['name', 'name_']]
| name | name_ |
|---|
The new name column improves on the old one. It improves on the problematic captures ('None', 'a', 'an', 'the', 'very', 'one', 'O', 'my') of the old one while capturing everthing else that the old one did. So, the old one can be replaced with the new.
# Replace the old name column with the new one
df_archive0.name = df_archive0.name_
df_archive0.drop(columns='name_', inplace=True)
df_archive0.columns
Index(['tweet_id', 'timestamp', 'text', 'url_1', 'url_2', 'name',
'dog_designation', 'rating_numerator', 'rating_denominator',
'favorite_count', 'retweet_count', 'user_id', 'user_name', 'user_url',
'in_reply_to_status_id', 'in_reply_to_user_id', 'source_url',
'source_name'],
dtype='object')
Produce a new extraction from the text column of numerator and denominator values. In some cases, the numerator was provided in the text as a decimal. So, the data type of the numerator column should be "float". Compare the new extraction with the old one and replace the old one if the new is better.
# The new extraction allows for decimals in the numerator and it disallows denominators that
# do not end in 0 in order to exclude 24/7, 7/11 and 1/2.
pattern = r'(?P<numer>\d+\.?\d*)/(?P<denom>\d+0)'
df_ratings = df_archive0.text.str.extract(pattern, expand=True)
df_ratings.head()
| numer | denom | |
|---|---|---|
| 0 | 13 | 10 |
| 1 | 13 | 10 |
| 2 | 12 | 10 |
| 3 | 13 | 10 |
| 4 | 12 | 10 |
df_archive0['numer'] = df_ratings.numer.astype('Float64')
df_archive0['denom'] = df_ratings.denom.astype('Int64')
Comparing the new denominator extraction with the old.
print(df_archive0.denom.notnull().sum())
df_archive0.denom.value_counts()
2108
10 2094 50 3 80 2 20 1 40 1 70 1 90 1 110 1 120 1 130 1 150 1 170 1 Name: denom, dtype: Int64
print(df_archive0.rating_denominator.notnull().sum())
df_archive0.rating_denominator.value_counts()
2109
10 2091 50 3 80 2 11 2 2 1 20 1 40 1 70 1 90 1 110 1 120 1 130 1 150 1 170 1 7 1 Name: rating_denominator, dtype: Int64
There is one place where the new denominator extraction is null when the old denominator extraction is not. When we look at it we see that it should be null. So, the new extraction gets it right.
for text in df_archive0[df_archive0.denom.isnull()].text:
print(text)
Meet Sam. She smiles 24/7 & secretly aspires to be a reindeer. Keep Sam smiling by clicking and sharing this link: https://t.co/98tB8y7y7t https://t.co/LouL5vdvxx
When we look at cases where the new denominator extraction differs from the old (see below), we see that the new gets it right. So, the new denominator extraction is an improvement.
mask = df_archive0.rating_denominator != df_archive0.denom
print(mask.sum())
df_archive0[mask][['text', 'rating_numerator', 'rating_denominator', 'numer', 'denom']]
3
| text | rating_numerator | rating_denominator | numer | denom | |
|---|---|---|---|---|---|
| 1068 | After so many requests, this is Bretagne. She ... | 9 | 11 | 14.0 | 10 |
| 1662 | This is Darrel. He just robbed a 7/11 and is i... | 7 | 11 | 10.0 | 10 |
| 2335 | This is an Albanian 3 1/2 legged Episcopalian... | 1 | 2 | 9.0 | 10 |
for index, text in df_archive0[mask].text.iteritems():
print(index, text)
1068 After so many requests, this is Bretagne. She was the last surviving 9/11 search dog, and our second ever 14/10. RIP https://t.co/XAVDNDaVgQ 1662 This is Darrel. He just robbed a 7/11 and is in a high speed police chase. Was just spotted by the helicopter 10/10 https://t.co/7EsP8LmSp5 2335 This is an Albanian 3 1/2 legged Episcopalian. Loves well-polished hardwood flooring. Penis on the collar. 9/10 https://t.co/d9NcXFKwLv
Where they differ, the new denominator extraction is better than the old.
Let's compare the new numerator extraction with the old. We already see that in the above three cases the new is better than the old.
mask = df_archive0.rating_numerator != df_archive0.numer
print(mask.sum())
df_archive0[mask][['text', 'rating_numerator', 'rating_denominator', 'numer', 'denom']]
7
| text | rating_numerator | rating_denominator | numer | denom | |
|---|---|---|---|---|---|
| 45 | This is Bella. She hopes her smile made you sm... | 5 | 10 | 13.5 | 10 |
| 695 | This is Logan, the Chow who lived. He solemnly... | 75 | 10 | 9.75 | 10 |
| 763 | This is Sophie. She's a Jubilant Bush Pupper. ... | 27 | 10 | 11.27 | 10 |
| 1068 | After so many requests, this is Bretagne. She ... | 9 | 11 | 14.0 | 10 |
| 1662 | This is Darrel. He just robbed a 7/11 and is i... | 7 | 11 | 10.0 | 10 |
| 1712 | Here we have uncovered an entire battalion of ... | 26 | 10 | 11.26 | 10 |
| 2335 | This is an Albanian 3 1/2 legged Episcopalian... | 1 | 2 | 9.0 | 10 |
In comparing the new numerator with the old we first see the three cases in which the old denominator was not 10 but should have been. Let us examine the text in the other cases.
mask = (df_archive0.rating_numerator != df_archive0.numer) & (df_archive0.rating_denominator == 10)
for index, text in df_archive0[mask].text.iteritems():
print(index, text)
45 This is Bella. She hopes her smile made you smile. If not, she is also offering you her favorite monkey. 13.5/10 https://t.co/qjrljjt948 695 This is Logan, the Chow who lived. He solemnly swears he's up to lots of good. H*ckin magical af 9.75/10 https://t.co/yBO5wuqaPS 763 This is Sophie. She's a Jubilant Bush Pupper. Super h*ckin rare. Appears at random just to smile at the locals. 11.27/10 would smile back https://t.co/QFaUiIHxHq 1712 Here we have uncovered an entire battalion of holiday puppers. Average of 11.26/10 https://t.co/eNm2S6p9BD
The new extraction, including decimal values, is an improvement.
# Replace the old numerator and denominator extractions with the new ones.
df_archive0.rating_numerator = df_archive0.numer
df_archive0.rating_denominator = df_archive0.denom
df_archive0.drop(columns=['numer','denom'], inplace=True)
df_archive0.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 2109 entries, 0 to 2355 Data columns (total 18 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 tweet_id 2109 non-null string 1 timestamp 2109 non-null datetime64[ns, UTC] 2 text 2109 non-null string 3 url_1 2109 non-null string 4 url_2 28 non-null string 5 name 1437 non-null string 6 dog_designation 331 non-null category 7 rating_numerator 2108 non-null Float64 8 rating_denominator 2108 non-null Int64 9 favorite_count 2102 non-null Int64 10 retweet_count 2102 non-null Int64 11 user_id 2102 non-null string 12 user_name 2102 non-null string 13 user_url 2102 non-null string 14 in_reply_to_status_id 22 non-null string 15 in_reply_to_user_id 22 non-null string 16 source_url 2109 non-null string 17 source_name 2109 non-null category dtypes: Float64(1), Int64(3), category(2), datetime64[ns, UTC](1), string(11) memory usage: 357.5 KB
mask = (df_archive0.rating_denominator != 10)
for index, text in df_archive0[mask].text.iteritems():
print(index, text)
433 The floofs have been released I repeat the floofs have been released. 84/70 https://t.co/NIYC820tmd 902 Why does this never happen at my front door... 165/150 https://t.co/HmwrdfEfUE 1120 Say hello to this unbelievably well behaved squad of doggos. 204/170 would try to pet all at once https://t.co/yGQI3He3xv 1165 Happy 4/20 from the squad! 13/10 for all https://t.co/eV1diwds8a 1202 This is Bluebert. He just saw that both #FinalFur match ups are split 50/50. Amazed af. 11/10 https://t.co/Kky1DPG4iq 1228 Happy Saturday here's 9 puppers on a bench. 99/90 good work everybody https://t.co/mpvaVxKmc1 1254 Here's a brigade of puppers. All look very prepared for whatever happens next. 80/80 https://t.co/0eb7R1Om12 1274 From left to right: Cletus, Jerome, Alejandro, Burp, & Titson None know where camera is. 45/50 would hug all at once https://t.co/sedre1ivTK 1351 Here is a whole flock of puppers. 60/50 I'll take the lot https://t.co/9dpcw6MdWa 1433 Happy Wednesday here's a bucket of pups. 44/40 would pet all at once https://t.co/HppvrYuamZ 1634 Two sneaky puppers were not initially seen, moving the rating to 143/130. Please forgive us. Thank you https://t.co/kRK51Y5ac3 1635 Someone help the girl is being mugged. Several are distracting her while two steal her shoes. Clever puppers 121/110 https://t.co/1zfnTJLt55 1779 IT'S PUPPERGEDDON. Total of 144/120 ...I think https://t.co/ZanVtAtvIq 1843 Here we have an entire platoon of puppers. Total score: 88/80 would pet all at once https://t.co/y93p6FLvVw
Observation: Where the denominator is not 10, it is because there is more than one dog involved. In the above list, 1165 and 1202 should not be included because there were two ratios in the text and the extraction picked up the first ratio when the second was really the one wanted. Excluding those two cases, in all the rest of the cases a denominator greater than 10 indicates the presence of more than one dog. A denominator greater than 10 is a sufficient indicator of more than one dog, but not a necessary one. This is evidenced by case 1165 which includes more than one dog and awards "13/10 for all".
mask = df_archive0.rating_numerator <= 7
print(mask.sum())
for index, text in df_archive0[mask].text.iteritems():
print(index, text)
166 229 This is Jerry. He's doing a distinguished tongue slip. Slightly patronizing tbh. You think you're better than us, Jerry? 6/10 hold me back https://t.co/DkOBbwulw1 315 When you're so blinded by your systematic plagiarism that you forget what day it is. 0/10 https://t.co/YbEJPkg4Ag 730 Who keeps sending in pictures without dogs in them? This needs to stop. 5/10 for the mediocre road https://t.co/ELqelxWMrC 765 This is Wesley. He's clearly trespassing. Seems rather h*ckin violent too. Weaponized forehead. 3/10 wouldn't let in https://t.co/pL7wbMRW7M 814 Another pic without a dog in it? What am I supposed to do? Rate the carpet? Fine I will. 7/10 looks adequately comfy https://t.co/OJZQ6I4gGd 883 This is Fido. He can tell the weather. Not good at fetch tho. Never comes when called. 4/10 would probably still pet https://t.co/4gOv2Q3iKP 896 Meet Toby. He has a drinking problem. Inflatable marijuana plant in the back is also not a good look. 7/10 cmon Toby https://t.co/Cim4DSj6Oi 912 Here's another picture without a dog in it. Idk why you guys keep sending these. 4/10 just because that's a neat rug https://t.co/mOmnL19Wsl 956 Please stop sending it pictures that don't even have a doggo or pupper in them. Churlish af. 5/10 neat couch tho https://t.co/u2c9c7qSg8 993 This is one of the most reckless puppers I've ever seen. How she got a license in the first place is beyond me. 6/10 https://t.co/z5bAdtn9kd 998 This sherk must've leapt out of the water and into the canoe, trapping the human. Won't even help paddle smh. 7/10 https://t.co/KubWEqOIgO 1004 Viewer discretion is advised. This is a terrible attack in progress. Not even in water (tragic af). 4/10 bad sherk https://t.co/L3U0j14N5R 1016 PUPDATE: can't see any. Even if I could, I couldn't reach them to pet. 0/10 much disappointment https://t.co/c7WXaB2nqX 1029 This is Percy. He fell asleep at the wheel. Irresponsible af. 7/10 absolute menace on the roadway https://t.co/QHbvtvaw8E 1045 This is Harold. He looks slippery af. Probably difficult to hug. Would still try tho. 7/10 great with kids I bet https://t.co/EVuqdEO66N 1078 This is Kyle. He's a heavy drinker and an avid pot user. Just wants to be pupular. 6/10 I can't support this Kyle https://t.co/rRULp7XFnO 1125 This is Charles. He's camera shy. Tail longer than average. Doesn't look overwhelmingly fluffy. 6/10 would still pet https://t.co/rXvcElhoog 1165 Happy 4/20 from the squad! 13/10 for all https://t.co/eV1diwds8a 1189 This is Alexanderson. He's got a weird ass birth mark. Dreadful at fetch. Won't eat kibble. 3/10 wtf @Target https://t.co/FmxOpf2Sgl 1219 This is Benedict. He's a feisty pup. Needs a brushing. Portable af. Looks very angry actually. 4/10 might not pet https://t.co/3oeFfHjv0Z 1239 This is Steven. He's inverted af. Also very helpful. Scans anything you want for free. Takes him a while tho. 7/10 https://t.co/tA0ZiQ7JcG 1241 This is Chester. He's clearly in charge of the other dogs. Weird ass paws. Not fit for fetch. 6/10 would still pet https://t.co/o2GvskrhHt 1249 What hooligan sent in pictures w/out a dog in them? Churlish af. 3/10 just bc that's a neat fluffy bean bag chair https://t.co/wcwoGOkZvz 1300 This is Jiminus. He's in a tub for some reason. What a jokester. Smh 7/10 churlish af https://t.co/84L4ED9Tpi 1303 This is Keurig. He's a rare dog. Laughs like an idiot tho. Head is basically a weapon. Poorly maintained goatee 4/10 https://t.co/xOrUyj7K30 1314 This is Elliot. He's blocking the roadway. Downright rude as hell. Doesn't care that you're already late. 3/10 https://t.co/FMUxir5pYu 1325 This is Dexter. He's a shy pup. Doesn't bark much. Dreadful fetcher. Has rare sun allergy. 7/10 still petable https://t.co/sA7P3JSqiv 1342 This is Chesterson. He's a Bolivian Scoop Dog. Incredibly portable. Can't bark for shit tho. 7/10 would still pet https://t.co/EatAd8JhyW 1363 This is Chip. He's an Upper West Nile Pantaloon. Extremely deadly. Will rip your throat out. 6/10 might still pet https://t.co/LUFnwzznaV 1384 This is Daniel. He's a neat pup. Exotic af. Custom paws. Leaps unannounced. Would totally pet. 7/10 daaamn Daniel https://t.co/5XaR0kj8cr 1388 This is Murphy. He's a mini golden retriever. Missing two legs (tragic). Mouth sharp. Looks rather perturbed. 6/10 https://t.co/ALO02IAKCn 1392 This is Eazy-E. He's colorful af. Must be rare. Submerged in Sprite (rad). Doesn't perform well when not wet. 6/10 https://t.co/UtFI7eUCjE 1399 This is Dave. He's a tropical pup. Short lil legs (dachshund mix?) Excels underwater, but refuses to eat kibble 5/10 https://t.co/ZJnCxlIf62 1406 This is Charl. He's a bully. Chucks that dumbbell around like its nothing. Sharp neck. Exceptionally unfluffy. 3/10 https://t.co/VfLoDZecJ7 1415 This is Rusty. He has no respect for POULTRY products. Unbelievable af. 7/10 would still pet https://t.co/hEH19t1eFp 1448 This is Berb. He just found out that they have made 31 Kidz Bop CD's. Downright terrifying. 7/10 hang in there Berb https://t.co/CIFLjiTFwZ 1459 This may be the greatest video I've ever been sent. 4/10 for Charles the puppy, 13/10 overall. (Vid by @stevenxx_) https://t.co/uaJmNgXR2P 1460 Meet Brian (pronounced "Kirk"). He's not amused by ur churlish tomfoolery. Once u put him down you're done for. 6/10 https://t.co/vityMwPKKi 1461 Please only send in dogs. This t-rex is very scary. 5/10 ...might still pet (vid by @helizabethmicha) https://t.co/Vn6w5w8TO2 1473 What kind of person sends in a pic without a dog in it? So churlish. Neat rug tho 7/10 https://t.co/LSTAwTdTaw 1478 Meet Phil. He's big af. Currently destroying this nice family home. Completely uncalled for. 3/10 not a good pupper https://t.co/fShNNhBWYx 1498 Meet Herschel. He's slightly bigger than ur average pupper. Looks lonely. Could probably ride 7/10 would totally pet https://t.co/VGaIMktX10 1508 When bae says they can't go out but you see them with someone else that same night. 5/10 & 10/10 for heartbroken pup https://t.co/aenk0KpoWM 1555 This is Hamrick. He's covered in corn flakes. Silly pupper. Looks congested. 7/10 considerably petable https://t.co/ROPZcAMQKI 1573 This is Marq. He stole this car. 7/10 wtf Marq? https://t.co/MHScqo5l8c 1579 "You got any games on your phone" 7/10 for invasive brown Dalmatian pupper https://t.co/yzGR9xjE9Q 1583 Army of water dogs here. None of them know where they're going. Have no real purpose. Aggressive barks. 5/10 for all https://t.co/A88x73TwMN 1601 This is Hammond. He's a peculiar pup. Loves long walks. Bark barely audible. Too many legs. 3/10 must be rare https://t.co/NOIiRWr5Jf 1608 This is Otis. He just passed a cop while going 61 in a 45. Very nervous pupper. 7/10 https://t.co/jJS8qQeuNO 1619 This is Jerry. He's a neat dog. No legs (tragic). Has more horns than a dog usually does. Bark is unique af. 5/10 https://t.co/85q7xlplsJ 1624 Here we have a basking dino pupper. Looks powerful. Occasionally shits eggs. Doesn't want the holidays to end. 5/10 https://t.co/DnNweb5eTO 1629 This is Bobby. He doesn't give a damn about personal space. Convinced he called shotgun first. 4/10 not the best dog https://t.co/b8XW69gSaU 1636 Gang of fearless hoofed puppers here. Straight savages. Elevated for extra terror. Front one has killed before 6/10s https://t.co/jkCb25OWfh 1645 This is Jiminy. He's not the brightest dog. Needs to lay off the kibble. 5/10 still petable https://t.co/omln4LOy1x 1670 This is Patrick. He's a bigass pupper. 7/10 https://t.co/J9DXBFoAQe 1680 Unique dog here. Wrinkly as hell. Weird segmented neck. Finger on fire. Doesn't seem to notice. 5/10 might still pet https://t.co/Hy9La4xNX3 1684 Can you spot Toby the guilty pupper? 7/10 would be higher but he made quite the mess shredding his stuffed pals https://t.co/3uCcDEJLXs 1692 This is Chuck. He's a neat dog. Very flexible. Trapped in a glass case of emotion. Devastatingly unfluffy 3/10 https://t.co/YqbU9xHV3p 1701 This is Alice. She's an idiot. 4/10 https://t.co/VQXdwJfkyS 1708 Say hello to Moofasa. He must be a powerful dog. Fenced in for your protection. Just got his ear pierced. 6/10 https://t.co/w6fRfQ3RXD 1727 Meet Penelope. She's a bacon frise. Total babe (lol get it like the movie). Doesn't bark tho. 5/10 very average dog https://t.co/SDcQYg0HSZ 1730 This is Bruce. He's a rare pup. Covered in Frosted Flakes. Nifty gold teeth. Overall good dog. 7/10 would pet firmly https://t.co/RtxxACzZ8A 1737 Guys this really needs to stop. We've been over this way too many times. This is a giraffe. We only rate dogs.. 7/10 https://t.co/yavgkHYPOC 1739 Say hello to William. He makes fun of others because he's terrified of his own deep-seated insecurities. 7/10 https://t.co/bwuV6FlRxr 1754 This is Linda. She fucking hates trees. 7/10 https://t.co/blaY85FIxR 1759 Meet Tango. He's a large dog. Doesn't care much for personal space. Owner isn't very accepting. Tongue slip. 6/10 https://t.co/p2T5kGebxe 1761 Exotic pup here. Tail long af. Throat looks swollen. Might breathe fire. Exceptionally unfluffy 2/10 would still pet https://t.co/a8SqCaSo2r 1764 This is Crystal. She's a shitty fireman. No sense of urgency. People could be dying Crystal. 2/10 just irresponsible https://t.co/rtMtjSl9pz 1787 Contortionist pup here. Inside pentagram. Clearly worships Satan. Known to slowly push fragile stuff off tables 6/10 https://t.co/EX9oR55VMe 1796 This is Juckson. He's totally on his way to a nascar race. 5/10 for Juckson https://t.co/IoLRvF0Kak 1803 This is Karl. Karl thinks he's slick. 6/10 sneaky pup https://t.co/Lo4ALwjVh4 1808 Exotic handheld dog here. Appears unathletic. Feet look deadly. Can be thrown a great distance. 5/10 might pet idk https://t.co/Avq4awulqk 1820 This is Bubbles. He kinda resembles a fish. Always makes eye contact with u no matter what. Sneaky tongue slip. 5/10 https://t.co/Nrhvc5tLFT 1836 Extremely rare pup here. Very religious. Always praying. Too many legs. Not overwhelmingly fluffy. Won't bark. 3/10 https://t.co/REyE5YKVBb 1839 This is Donny. He's summoning the demon monster Babadook. 6/10 Donny please no that won't be a good time for anyone https://t.co/kiW6Knb7Gp 1861 Rare shielded battle dog here. Very happy about abundance of lettuce. Painfully slow fetcher. Still petable. 5/10 https://t.co/C3tlKVq7eO 1869 What kind of person sends in a picture without a dog in it? 1/10 just because that's a nice table https://t.co/RDXCfk8hK0 1872 This is Coops. He's yelling at the carpet. Not very productive Coops. 7/10 https://t.co/Uz52oYnHzF 1874 This is Steven. He got locked outside. Damn it Steven. 5/10 nice grill tho https://t.co/zf7Sxxjfp3 1898 Meet Patrick. He's an exotic pup. Jumps great distances for a dog. Always gets injured when I toss him a ball. 3/10 https://t.co/Unz1uNrOzo 1901 Two gorgeous dogs here. Little waddling dog is a rebel. Refuses to look at camera. Must be a preteen. 5/10 & 8/10 https://t.co/YPfw7oahbD 1904 Rare submerged pup here. Holds breath for a long time. Frowning because that spoon ignores him. 5/10 would still pet https://t.co/EJzzNHE8bE 1920 This is Henry. He's a shit dog. Short pointy ears. Leaves trail of pee. Not fluffy. Doesn't come when called. 2/10 https://t.co/Pu9RhfHDEQ 1925 This is Earl. Earl is lost. Someone help Earl. He has no tags. Just trying to get home. 5/10 hang in there Earl https://t.co/1ZbfqAVDg6 1928 Herd of wild dogs here. Not sure what they're trying to do. No real goals in life. 3/10 find your purpose puppers https://t.co/t5ih0VrK02 1938 Guys I'm getting real tired of this. We only rate dogs. Please don't send in other things like this Bulbasaur. 3/10 https://t.co/t5rQHl6W8M 1941 This is a heavily opinionated dog. Loves walls. Nobody knows how the hair works. Always ready for a kiss. 4/10 https://t.co/dFiaKZ9cDl 1947 Large blue dog here. Cool shades. Flipping us off w both hands. Obviously a preteen. 3/10 for rude blue preteen pup https://t.co/mcPd5AFfhA 1956 This is Jeffri. He's a speckled ice pupper. Very lazy. Enjoys the occasional swim. Rather majestic really. 7/10 https://t.co/0iyItbtkr8 1959 Sun burnt dog here. Quite large. Wants to promote peace. Looks unemployed. Ears for days. 7/10 would pet profusely https://t.co/WlKiN3ll0w 1965 This is Gerald. He's a fluffy lil yellow pup. Always looks like his favorite team just lost on a hail mary. 7/10 https://t.co/GpSkpN8kXS 1972 Magical floating dog here. Very calm. Always hangs by the pond. Rather moist. Good listener. 6/10 personally I'd pet https://t.co/1euKoOvy49 1979 Extraordinary dog here. Looks large. Just a head. No body. Rather intrusive. 5/10 would still pet https://t.co/ufHWUFA9Pu 1981 This is Chet. He's having a hard time. Really struggling. 7/10 hang in there pupper https://t.co/eb4ta0xtnd 1986 Marvelous dog here. Rad ears. Not very soft. Large tumor on nose. Has a pet rock. Good w kids. 6/10 overall neat pup https://t.co/g5YkRqP0dg 1992 This is Norman. Doesn't bark much. Very docile pup. Up to date on current events. Overall nifty pupper. 6/10 https://t.co/ntxsR98f3U 1998 Meet Darby. He's a Fiscal Tutankhamen Waxbeard. Really likes steak. 7/10 https://t.co/rSndxTL0Ap 2012 This is Ridley. He doesn't know how to couch. 7/10 https://t.co/UHJE0UgMf7 2013 Exotic underwater dog here. Very shy. Wont return tennis balls I toss him. Never been petted. 5/10 I bet he's soft https://t.co/WH7Nzc5IBA 2022 Say hello to Gizmo. He's upset because he's not sure if he's really big or the shopping cart is really small. 7/10 https://t.co/XkMtCGhr4a 2026 This is Brad. He's a chubby lil pup. Doesn't really need the food he's trying to reach. 5/10 you've had enough Brad https://t.co/vPXKSaNsbE 2033 Very fit horned dog here. Looks powerful. Not phased by wind. Great beard. Big enough to ride? 6/10 would cuddle https://t.co/wwwYO9C9kl 2040 Interesting dog here. Very large. Purple. Manifests rainbows. Perfect teeth. No ears. Surprisingly knowledgable 6/10 https://t.co/QVaEMsB9tS 2044 Super rare dog here. Spiffy mohawk. Sharp mouth. Shits eggs. Cool chariot wheel in background. 6/10 v confident pup https://t.co/pcx8jm1J1K 2054 Striped dog here. Having fun playing on back. Sturdy paws. Looks like an organized Dalmatian. 7/10 would still pet https://t.co/U1mSS3Ykez 2063 This is Anthony. He just finished up his masters at Harvard. Unprofessional tattoos. Always looks perturbed. 5/10 https://t.co/iHLo9rGay1 2070 Two miniature golden retrievers here. Webbed paws. Don't walk very efficiently. Can't catch a tennis ball. 4/10s https://t.co/WzVLdSHJU7 2071 Meet Phred. He isn't steering, looking at the road, or wearing a seatbelt. Phred is a rolling tornado of danger 6/10 https://t.co/mZD7Bo7HfV 2076 Pink dogs here. Unreasonably long necks. Left guy has only 1 leg. Quite nimble. Don't bark tho 4/10s would still pet https://t.co/QY5uvMmmQk 2077 This is Jett. He is unimpressed by flower. 7/10 https://t.co/459qWNnV3F 2079 Scary dog here. Too many legs. Extra tail. Not soft, let alone fluffy. Won't bark. Moves sideways. Has weapon. 2/10 https://t.co/XOPXCSXiUT 2088 This is Antony. He's a Sheraton Tetrahedron. Skips awkwardly. Doesn't look when he crosses the road (reckless). 7/10 https://t.co/gTy4WMXu8l 2091 Flamboyant pup here. Probably poisonous. Won't eat kibble. Doesn't bark. Slow af. Petting doesn't look fun. 1/10 https://t.co/jxukeh2BeO 2092 This dude slaps your girl's ass what do you do? 5/10 https://t.co/6dioUL6gcP 2093 This is Traviss. He has no ears. Two rare dogs in background. I bet they all get along nicely. 7/10s I'd pet all https://t.co/Viu56hVhhP 2100 Meet Danny. He's too good to look at the road when he's driving. Absolute menace. 6/10 completely irresponsible https://t.co/I1lMUy1FqH 2107 Silly dog here. Wearing bunny ears. Nice long tail. Unique paws. Not crazy soft but will do. Extremely agile. 7/10 https://t.co/2BnCLtJMxD 2109 Vibrant dog here. Fabulous tail. Only 2 legs tho. Has wings but can barely fly (lame). Rather elusive. 5/10 okay pup https://t.co/cixC0M3P1e 2119 Sharp dog here. Introverted. Loves purple. Not fun to pet. Hurts to cuddle with. 6/10 still good dog tho https://t.co/Dfv2YaHPMn 2134 This is Randall. He's from Chernobyl. Built playground himself. Has been stuck up there quite a while. 5/10 good dog https://t.co/pzrvc7wKGd 2136 This is Tommy. He's a cool dog. Hard not to step on. Won't let go of seashell. Not fast by any means. 3/10 https://t.co/0gY6XTOpn3 2139 Awesome dog here. Not sure where it is tho. Spectacular camouflage. Enjoys leaves. Not very soft. 5/10 still petable https://t.co/rOTOteKx4q 2153 This is a brave dog. Excellent free climber. Trying to get closer to God. Not very loyal though. Doesn't bark. 5/10 https://t.co/ODnILTr4QM 2157 Special dog here. Pretty big. Neck kinda long for dog. Cool spots. Must be a Dalmatian variant. 6/10 would still pet https://t.co/f8GXeDbFzu 2159 This is Keith. He's had 13 DUIs. 7/10 that's too many Keith https://t.co/fa7olwrF9Y 2163 This is Billl. He's trying to be a ghost but he's not very good at it. 6/10 c'mon Billl https://t.co/ero0XfdGtY 2181 Two gorgeous pups here. Both have cute fake horns(adorable). Barn in the back looks on fire. 5/10 would pet rly well https://t.co/w5oYFXi0uh 2183 This is Bernie. He's taking his Halloween costume very seriously. Wants to be baked. 3/10 not a good idea Bernie smh https://t.co/1zBp1moFlX 2184 Honor to rate this dog. Lots of fur on him. Two massive tumors on back. Should get checked out. Very neat tho. 7/10 https://t.co/bMhs18elNF 2186 Unique dog here. Oddly shaped tail. Long pink front legs. I don't think dogs breath underwater sos. 4/10 bad owner https://t.co/0EJXxE9UxW 2198 This is a wild Toblerone from Papua New Guinea. Mouth always open. Addicted to hay. Acts blind. 7/10 handsome dog https://t.co/IGmVbz07tZ 2202 Fascinating dog here. Loves beach. Oddly long nose for dog. Massive ass paws. Hard to cuddle w. 3/10 would still pet https://t.co/IiSdmhkC5N 2206 Meet Zeek. He is a grey Cumulonimbus. Zeek is hungry. Someone should feed Zeek asap. 5/10 absolutely terrifying https://t.co/fvVNScw8VH 2211 Here is a horned dog. Much grace. Can jump over moons (dam!). Paws not soft. Bad at barking. 7/10 can still pet tho https://t.co/2Su7gmsnZm 2222 Here is a mother dog caring for her pups. Snazzy red mohawk. Doesn't wag tail. Pups look confused. Overall 4/10 https://t.co/YOHe6lf09m 2235 This is a Trans Siberian Kellogg named Alfonso. Huge ass eyeballs. Actually Dobby from Harry Potter. 7/10 https://t.co/XpseHBlAAb 2237 This lil pup is Oliver. Hops around. Has wings but doesn't fly (lame). Annoying chirp. Won't catch tennis balls 2/10 https://t.co/DnhUw0aBM2 2238 This is Alfie. He's that one hypocritical gym teacher who made you run laps. Great posture. Cool bench. 6/10 https://t.co/GCJzm3YsfX 2239 This dog resembles a baked potato. Bed looks uncomfortable. No tail. Comes with butter tho. 3/10 petting still fun https://t.co/x89NSCEZCq 2242 Wow. Armored dog here. Ready for battle. Face looks dangerous. Not very loyal. Lil dog on back havin a blast. 5/10 https://t.co/SyMoWrp368 2246 This is Tedrick. He lives on the edge. Needs someone to hit the gas tho. Other than that he's a baller. 10&2/10 https://t.co/LvP1TTYSCN 2258 What a dog to start the day with. Very calm. Likes to chill by pond. Corkscrews sticking out of head. Obedient. 7/10 https://t.co/0nIxPTDWAZ 2261 Never seen dog like this. Breathes heavy. Tilts head in a pattern. No bark. Shitty at fetch. Not even cordless. 1/10 https://t.co/i9iSGNn3fx 2272 Two dogs in this one. Both are rare Jujitsu Pythagoreans. One slightly whiter than other. Long legs. 7/10 and 8/10 https://t.co/ITxxcc4v9y 2274 This is Reese and Twips. Reese protects Twips. Both think they're too good for seat belts. Simply reckless. 7/10s https://t.co/uLzRi1drVK 2276 Exotic dog here. Long neck. Weird paws. Obsessed with bread. Waddles. Flies sometimes (wow!). Very happy dog. 6/10 https://t.co/rqO4I3nf2N 2279 This is Biden. Biden just tripped... 7/10 https://t.co/3Fm9PwLju1 2288 These are strange dogs. All have toupees. Long neck for dogs. In a shed of sorts? Work in groups? 4/10 still petable https://t.co/PZxSarAfSN 2296 This is Clybe. He is an Anemone Valdez. One ear works. Can look in 2 different directions at once. Tongue slip. 7/10 https://t.co/Ks0jZtdIrr 2305 My goodness. Very rare dog here. Large. Tail dangerous. Kinda fat. Only eats leaves. Doesn't come when called 3/10 https://t.co/xYGdBrMS9h 2310 Unfamiliar with this breed. Ears pointy af. Won't let go of seashell. Won't eat kibble. Not very fast. Bad dog 2/10 https://t.co/EIn5kElY1S 2312 This is Josep. He is a Rye Manganese mix. Can drive w eyes closed. Very irresponsible. Menace on the roadways. 5/10 https://t.co/XNGeDwrtYH 2316 Cool dog. Enjoys couch. Low monotone bark. Very nice kicks. Pisses milk (must be rare). Can't go down stairs. 4/10 https://t.co/vXMKrJC81s 2320 Here we see a lone northeastern Cumberbatch. Half ladybug. Only builds with bricks. Very confident with body. 7/10 https://t.co/7LtjBS0GPK 2322 Oh boy what a pup! Sunglasses take this one to the next level. Weirdly folds front legs. Pretty big. 6/10 https://t.co/yECbFrSArM 2323 Here we have an Austrian Pulitzer. Collectors edition. Levitates (?). 7/10 would garden with https://t.co/NMQq6HIglK 2326 This is quite the dog. Gets really excited when not in water. Not very soft tho. Bad at fetch. Can't do tricks. 2/10 https://t.co/aMCTNWO94t 2327 This is a southern Vesuvius bumblegruff. Can drive a truck (wow). Made friends with 5 other nifty dogs (neat). 7/10 https://t.co/LopTBkKa8h 2330 Unique dog here. Very small. Lives in container of Frosted Flakes (?). Short legs. Must be rare 6/10 would still pet https://t.co/XMD9CwjEnM 2334 This is a funny dog. Weird toes. Won't come down. Loves branch. Refuses to eat his food. Hard to cuddle with. 3/10 https://t.co/IIXis0zta0 2338 Not familiar with this breed. No tail (weird). Only 2 legs. Doesn't bark. Surprisingly quick. Shits eggs. 1/10 https://t.co/Asgdc6kuLX 2342 Here we have a well-established sunblockerspaniel. Lost his other flip-flop. 6/10 not very waterproof https://t.co/3RU6x0vHB7 2349 This is an odd dog. Hard on the outside but loving on the inside. Petting still fun. Doesn't play catch well. 2/10 https://t.co/v5A4vzSDdc 2351 Here we have a 1949 1st generation vulpix. Enjoys sweat tea and Fox News. Cannot be phased. 5/10 https://t.co/4B7cOc1EDq 2352 This is a purebred Piers Morgan. Loves to Netflix and chill. Always looks like he forgot to unplug the iron. 6/10 https://t.co/DWnyCjf2mx 2354 This is a western brown Mitsubishi terrier. Upset about leaf. Actually 2 dogs here. 7/10 would walk the shit out of https://t.co/r7mOb2m0UI
Observation: Low ratings are often given for something comical or ironic. Often the picture is not a picture of a dog. So, low ratings are an indicator, though not a perfect one, that the picture does not contain a dog.
mask = (df_archive0.rating_denominator == 10) & (df_archive0.rating_numerator > 20)
for index, text in df_archive0[mask].text.iteritems():
print(index, text)
979 This is Atticus. He's quite simply America af. 1776/10 https://t.co/GRXwMxLBkh 2074 After so many requests... here you go. Good dogg. 420/10 https://t.co/yfAAo1gdeY
Observation: There are two unusually high ratings in the set. The first is a rating of 1776/10. This is for a dog dressed up in the colors and symbols of the American flag and posted on July 4, the American Independence Day, where 1776 is the famous year of American independence. The rating is clearly non-serious; rather, it is a patriotic gesture. The rating should be removed and replaced with pd.NA.
The second is a rating of 420/10. The score of 420 is a reference to the smoking of marijuana and the picture is not that of an actual dog, but of the rapper "Snoop Dogg". This row should be removed.
df_archive0.drop(index=2074, inplace=True)
df_archive0.loc[979, 'rating_numerator'] = pd.NA
df_archive0.loc[979, 'rating_denominator'] = pd.NA
mask = (df_archive0.rating_denominator == 10) & (df_archive0.rating_numerator > 20)
df_archive0[mask]
| tweet_id | timestamp | text | url_1 | url_2 | name | dog_designation | rating_numerator | rating_denominator | favorite_count | retweet_count | user_id | user_name | user_url | in_reply_to_status_id | in_reply_to_user_id | source_url | source_name |
|---|
df_archive0.loc[1165, 'rating_numerator'] = 13.0
df_archive0.loc[1165, 'rating_denominator'] = 10
df_archive0.loc[1202, 'rating_numerator'] = 11.0
df_archive0.loc[1202, 'rating_denominator'] = 10
df_archive0.loc[[1165, 1202],:]
| tweet_id | timestamp | text | url_1 | url_2 | name | dog_designation | rating_numerator | rating_denominator | favorite_count | retweet_count | user_id | user_name | user_url | in_reply_to_status_id | in_reply_to_user_id | source_url | source_name | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1165 | 722974582966214656 | 2016-04-21 02:25:47+00:00 | Happy 4/20 from the squad! 13/10 for all https... | https://twitter.com/dog_rates/status/722974582... | <NA> | <NA> | NaN | 13.0 | 10 | 3926 | 1489 | 4196983835 | WeRateDogs® | https://t.co/6ytGi24QCk | <NA> | <NA> | http://twitter.com/download/iphone | Twitter for iPhone |
| 1202 | 716439118184652801 | 2016-04-03 01:36:11+00:00 | This is Bluebert. He just saw that both #Final... | https://twitter.com/dog_rates/status/716439118... | <NA> | Bluebert | NaN | 11.0 | 10 | 2288 | 200 | 4196983835 | WeRateDogs® | https://t.co/6ytGi24QCk | <NA> | <NA> | http://twitter.com/download/iphone | Twitter for iPhone |
# Convert df_predictions0.tweet_id to string
df_predictions0.tweet_id = df_predictions0.tweet_id.astype('string')
# Merge
df_master = df_archive0.set_index('tweet_id').join(df_predictions0.set_index('tweet_id'), how='outer')
df_master.head()
| timestamp | text | url_1 | url_2 | name | dog_designation | rating_numerator | rating_denominator | favorite_count | retweet_count | ... | img_num | p1 | p1_conf | p1_dog | p2 | p2_conf | p2_dog | p3 | p3_conf | p3_dog | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| tweet_id | |||||||||||||||||||||
| 666020888022790149 | 2015-11-15 22:32:08+00:00 | Here we have a Japanese Irish Setter. Lost eye... | https://twitter.com/dog_rates/status/666020888... | <NA> | <NA> | NaN | 8.0 | 10 | 2344 | 443 | ... | 1.0 | Welsh_springer_spaniel | 0.465074 | True | collie | 0.156665 | True | Shetland_sheepdog | 0.061428 | True |
| 666029285002620928 | 2015-11-15 23:05:30+00:00 | This is a western brown Mitsubishi terrier. Up... | https://twitter.com/dog_rates/status/666029285... | <NA> | <NA> | NaN | 7.0 | 10 | 117 | 41 | ... | 1.0 | redbone | 0.506826 | True | miniature_pinscher | 0.074192 | True | Rhodesian_ridgeback | 0.072010 | True |
| 666033412701032449 | 2015-11-15 23:21:54+00:00 | Here is a very happy pup. Big fan of well-main... | https://twitter.com/dog_rates/status/666033412... | <NA> | <NA> | NaN | 9.0 | 10 | 107 | 39 | ... | 1.0 | German_shepherd | 0.596461 | True | malinois | 0.138584 | True | bloodhound | 0.116197 | True |
| 666044226329800704 | 2015-11-16 00:04:52+00:00 | This is a purebred Piers Morgan. Loves to Netf... | https://twitter.com/dog_rates/status/666044226... | <NA> | <NA> | NaN | 6.0 | 10 | 259 | 122 | ... | 1.0 | Rhodesian_ridgeback | 0.408143 | True | redbone | 0.360687 | True | miniature_pinscher | 0.222752 | True |
| 666049248165822465 | 2015-11-16 00:24:50+00:00 | Here we have a 1949 1st generation vulpix. Enj... | https://twitter.com/dog_rates/status/666049248... | <NA> | <NA> | NaN | 5.0 | 10 | 93 | 38 | ... | 1.0 | miniature_pinscher | 0.560311 | True | Rottweiler | 0.243682 | True | Doberman | 0.154629 | True |
5 rows × 28 columns
df_master.info()
<class 'pandas.core.frame.DataFrame'> Index: 2196 entries, 666020888022790149 to 892420643555336193 Data columns (total 28 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 timestamp 2108 non-null datetime64[ns, UTC] 1 text 2108 non-null string 2 url_1 2108 non-null string 3 url_2 28 non-null string 4 name 1437 non-null string 5 dog_designation 331 non-null category 6 rating_numerator 2106 non-null Float64 7 rating_denominator 2106 non-null Int64 8 favorite_count 2101 non-null Int64 9 retweet_count 2101 non-null Int64 10 user_id 2101 non-null string 11 user_name 2101 non-null string 12 user_url 2101 non-null string 13 in_reply_to_status_id 22 non-null string 14 in_reply_to_user_id 22 non-null string 15 source_url 2108 non-null string 16 source_name 2108 non-null category 17 jpg_url 2075 non-null object 18 img_num 2075 non-null float64 19 p1 2075 non-null object 20 p1_conf 2075 non-null float64 21 p1_dog 2075 non-null object 22 p2 2075 non-null object 23 p2_conf 2075 non-null float64 24 p2_dog 2075 non-null object 25 p3 2075 non-null object 26 p3_conf 2075 non-null float64 27 p3_dog 2075 non-null object dtypes: Float64(1), Int64(3), category(2), datetime64[ns, UTC](1), float64(4), object(7), string(10) memory usage: 476.6+ KB
df_master.to_csv('twitter-archive-master.csv', index=False)
df_master.timestamp.min(), df_master.timestamp.max()
(Timestamp('2015-11-15 22:32:08+0000', tz='UTC'),
Timestamp('2017-08-01 16:23:56+0000', tz='UTC'))
df_master.timestamp.max() - df_master.timestamp.min()
Timedelta('624 days 17:51:48')
The time period for this study about "We Rate Dogs" is a period of 624 days from November 2015 until August 2017.
What are the most popular dogs names in the posts from "We Rate Dogs" during this time period? We can see below that the answer for the top four names is: 'Charlie', 'Oliver', 'Lucy', and 'Cooper'.
df_master.name.value_counts()
Oliver 11
Lucy 11
Charlie 11
Cooper 10
Tucker 9
..
Rinna 1
Lambeau 1
Timmy 1
Jett 1
Schnitzel 1
Name: name, Length: 963, dtype: Int64
One might expect the retweet_count to be strongly correlated with the favorite_count. We will investigate that to see if that expectation is confirmed.
# Create and clean a DataFrame for this analysis
df_retweet = df_master[['favorite_count', 'retweet_count']].copy()
df_retweet = df_retweet.dropna()
df_retweet.isna().any()
favorite_count False retweet_count False dtype: bool
df_retweet.retweet_count = df_retweet.retweet_count.astype(int)
df_retweet.favorite_count = df_retweet.favorite_count.astype(int)
df_retweet.info()
<class 'pandas.core.frame.DataFrame'> Index: 2101 entries, 666020888022790149 to 892420643555336193 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 favorite_count 2101 non-null int64 1 retweet_count 2101 non-null int64 dtypes: int64(2) memory usage: 49.2+ KB
correlation = df_retweet.corr()
correlation
| favorite_count | retweet_count | |
|---|---|---|
| favorite_count | 1.000000 | 0.925431 |
| retweet_count | 0.925431 | 1.000000 |
print('The correlation between favorite_count and retweet_count is {}.'.format(correlation.iloc[0,1]))
print('This results in a R-squared value of {}.'.format(correlation.iloc[0,1]**2))
The correlation between favorite_count and retweet_count is 0.9254305767391302. This results in a R-squared value of 0.8564217523637192.
With a correlation of 0.926, the two features favorite_count and retweet_count are strongly positively correlated, as one might expect.
We can perform a linear regression with retweet_count as the response and favorite_count as the predictor.
df_retweet['intercept'] = 1
y = df_retweet['retweet_count'].astype(float)
X = df_retweet[['intercept', 'favorite_count']].astype(float)
model = sm.OLS(y, X)
results = model.fit()
results.summary()
| Dep. Variable: | retweet_count | R-squared: | 0.856 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.856 |
| Method: | Least Squares | F-statistic: | 1.252e+04 |
| Date: | Fri, 12 Mar 2021 | Prob (F-statistic): | 0.00 |
| Time: | 14:30:48 | Log-Likelihood: | -18472. |
| No. Observations: | 2101 | AIC: | 3.695e+04 |
| Df Residuals: | 2099 | BIC: | 3.696e+04 |
| Df Model: | 1 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| intercept | -275.9993 | 42.161 | -6.546 | 0.000 | -358.681 | -193.318 |
| favorite_count | 0.3333 | 0.003 | 111.894 | 0.000 | 0.327 | 0.339 |
| Omnibus: | 1762.923 | Durbin-Watson: | 1.262 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 187316.238 |
| Skew: | 3.347 | Prob(JB): | 0.00 |
| Kurtosis: | 48.770 | Cond. No. | 1.72e+04 |
# The parameters -- intercept and slope -- of the linear regression
results.params
intercept -275.999294 favorite_count 0.333294 dtype: float64
# We can get the parameters for the linear regression a second way
y = df_retweet.retweet_count
x = df_retweet.favorite_count
fit = np.polyfit(x, y, 1)
fit
array([ 0.3332938 , -275.99929365])
We can make a scatter plot of retweet_count versus favorite_count, adding in the regression line with parameters calculated above.
df_retweet.plot(x='favorite_count', y='retweet_count', kind='scatter', figsize=(8,6))
plt.plot(np.array(x), fit[0] * np.array(x) + fit[1], color='red', linewidth=2)
plt.text(95000, 25000, 'y={:.1f}+{:.2f}*x'.format(fit[1], fit[0]), color='red', size=12)
plt.xlabel('Favorite Count', fontsize=14)
plt.ylabel('Retweet Count', fontsize=14)
plt.title('Retweet Count vs Favorite Count', fontsize=18);
# Reference: https://towardsdatascience.com/simple-and-multiple-linear-regression-with-python-c9ab422ec29c
"We Rate Dogs" provides ratings as a ratio with a non-negative numerator and a denominator that is often 10 or some multiple of 10 -- for example "13/10". When the denominator is some multiple of 10, it is almost always because more than one dog is involved in the picture(s). For what follows, we will exclude cases with multiple dogs as best we can by only looking at ratings where the denominator is 10. (This technique is not a perfect filter for just posts about a single dog since sometimes a rating might be give as "12/10 for each one". But at least in that case the rating is clearly meant to apply to each individual rather than to the group in aggregate.) By selecting posts only where the denominator is 10, we can summarize the rating easily by simply looking at the rating numerator alone, which we will do.
(df_archive0.rating_denominator == 10).sum(), (df_master.rating_denominator == 10).sum()
(2094, 2094)
# Set up a DataFrame for the analysis
mask_denom10 = df_master.rating_denominator == 10
df_favrat = df_master[mask_denom10][['favorite_count','rating_numerator']].copy()
df_favrat = df_favrat.dropna()
df_favrat.head()
| favorite_count | rating_numerator | |
|---|---|---|
| tweet_id | ||
| 666020888022790149 | 2344 | 8.0 |
| 666029285002620928 | 117 | 7.0 |
| 666033412701032449 | 107 | 9.0 |
| 666044226329800704 | 259 | 6.0 |
| 666049248165822465 | 93 | 5.0 |
df_favrat.info()
<class 'pandas.core.frame.DataFrame'> Index: 2087 entries, 666020888022790149 to 892420643555336193 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 favorite_count 2087 non-null Int64 1 rating_numerator 2087 non-null Float64 dtypes: Float64(1), Int64(1) memory usage: 53.0+ KB
df_favrat.describe()
| favorite_count | rating_numerator | |
|---|---|---|
| count | 2087.000000 | 2087.000000 |
| mean | 8034.851461 | 10.611778 |
| std | 11706.983395 | 2.163877 |
| min | 69.000000 | 0.000000 |
| 25% | 1770.500000 | 10.000000 |
| 50% | 3651.000000 | 11.000000 |
| 75% | 10072.000000 | 12.000000 |
| max | 150433.000000 | 14.000000 |
The mean rating is 10.6 and the median is 11. The bulk of the ratings, the three upper quartiles, are greater than or equal to 10. This is also evident in the histogram below which shows a left-skewed distribution.
df_favrat.rating_numerator.hist(bins='auto', figsize=(7,5))
plt.xlabel('Rating', fontsize=14)
plt.ylabel('Counts', fontsize=14)
plt.title('Histogram of Ratings', fontsize=18);
We can plot favorite_count versus rating_numerator to see whether there is any apparent relationship. Below we see that higher favorite counts are associated with higher ratings.
df_favrat.plot(x='rating_numerator', y='favorite_count', kind='scatter', figsize=(7,5))
plt.xlabel('Rating', fontsize=14)
plt.ylabel('Favorite Count', fontsize=14)
plt.title('Favorite Count versus Rating', fontsize=18);
We see that the value of 10 bifurcates the ratings into two groups: less than 10 and greater than or equal to 10. We saw previously that the number of ratings increases substantially at 10. Plus, we see here that higher frequency counts only occur for ratings greater than or equal to 10. So, in examining favorite count versus rating further, we will divide the data according to this natural bifurcation.
# Create DataFrame for lower portion of the bifurcation (rating < 10)
df_favrat_low = df_favrat[df_favrat.rating_numerator < 10]
df_favrat_low.head()
| favorite_count | rating_numerator | |
|---|---|---|
| tweet_id | ||
| 666020888022790149 | 2344 | 8.0 |
| 666029285002620928 | 117 | 7.0 |
| 666033412701032449 | 107 | 9.0 |
| 666044226329800704 | 259 | 6.0 |
| 666049248165822465 | 93 | 5.0 |
df_favrat_low.boxplot(by='rating_numerator', figsize=(7,5))
plt.xlabel('Rating', fontsize=14)
plt.ylabel('Favorite Count', fontsize=14)
plt.title('Favorite Count versus Rating (<10)', fontsize=12);
# Show the value counts per rating, sorted by the rating value
df_favrat_low.rating_numerator.value_counts().sort_index()
0.00 2 1.00 4 2.00 9 3.00 19 4.00 15 5.00 33 6.00 32 7.00 51 8.00 98 9.00 154 9.75 1 Name: rating_numerator, dtype: Int64
# Create DataFrame for higher portion of the bifurcation (rating >= 10)
df_favrat_high = df_favrat[df_favrat.rating_numerator >= 10]
df_favrat_high.head()
| favorite_count | rating_numerator | |
|---|---|---|
| tweet_id | ||
| 666050758794694657 | 119 | 10.0 |
| 666055525042405380 | 394 | 10.0 |
| 666063827256086533 | 431 | 10.0 |
| 666073100786774016 | 280 | 10.0 |
| 666102155909144576 | 69 | 11.0 |
df_favrat_high.boxplot(by='rating_numerator', figsize=(7,5))
plt.xlabel('Rating', fontsize=14)
plt.ylabel('Favorite Count', fontsize=14)
plt.title('Favorite Count versus Rating (>= 10)', fontsize=12);
Nearly all of the favorite counts for the low ratings (< 10) we saw were less than 20000 and most of them were less than 5000. That range of counts only rises to the first gradation above 0 on this plot for the high ratings (>= 10). The high ratings see a much higher range of favorite counts than the low ratings.
# Show the value counts per rating, sorted by the rating value
df_favrat_high.rating_numerator.value_counts().sort_index()
10.00 435 11.00 417 11.26 1 11.27 1 12.00 482 13.00 291 13.50 1 14.00 41 Name: rating_numerator, dtype: Int64
It was observed informally that low ratings sometimes indicate that a picture does not contain a dog but rather a different kind of animal (like a hedge hog, for example). We will investigate using ratings as an indicator for whether the post concerns a dog. The question we would like to investigate here is, "Does 'We Rate Dogs' use low ratings to indicate that the post is not about a dog?" Or more precisely, "How strong of an indicator is the rating for the presence of a dog in the post?" To do this, we would need to know which posts concern a dog and which do not. Apart from recording that information for each post by means of human observation (a tedious task), we will use the machine learning (ML) predictions data provided to us as a benchmark for whether a post concerns a dog. This is an imperfect benchmark, since the prediction data itself should be tested for reliability. So, the more accurate way of looking at this investigation is as a comparison of using the ratings as dog predictors with the machine learning prediction data provided. The ML prediction data provides three ranked predictions -- first, second, and third in confidence. We will compare the ratings as dog predictors with (1) the top ranked predictions of the ML algorithm as well as with (2) the conjunction of the ranked predictions. The latter case is a conservative prediction and will only predict a dog when all three ranked predictions say there is a dog (i.e., the conjunction of the predictions); otherwise, the default will be that there is not a dog.
#Create DataFrame and clean for analysis
df_predplus = df_master[['rating_numerator', 'rating_denominator', 'p1_dog', 'p2_dog', 'p3_dog']].copy()
df_predplus = df_predplus[df_predplus.rating_denominator == 10].dropna()
df_predplus.rating_numerator = df_predplus.rating_numerator.astype(float) # Needed for statsmodels.api
df_predplus.rating_denominator = df_predplus.rating_denominator.astype(int)
# Make predictions 1/0 instead of True/False for logistic regression
df_predplus.p1_dog = df_predplus.p1_dog.astype(int)
df_predplus.p2_dog = df_predplus.p2_dog.astype(int)
df_predplus.p3_dog = df_predplus.p3_dog.astype(int)
# Add the conjunction of dog predictions (p123_dog)
df_predplus['p123_dog'] = df_predplus.p1_dog * df_predplus.p2_dog * df_predplus.p3_dog
# Put in the intercept for the upcoming logistic regression
df_predplus['intercept'] = 1
df_predplus.info()
<class 'pandas.core.frame.DataFrame'> Index: 1973 entries, 666020888022790149 to 892420643555336193 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 rating_numerator 1973 non-null float64 1 rating_denominator 1973 non-null int64 2 p1_dog 1973 non-null int64 3 p2_dog 1973 non-null int64 4 p3_dog 1973 non-null int64 5 p123_dog 1973 non-null int64 6 intercept 1973 non-null int64 dtypes: float64(1), int64(6) memory usage: 123.3+ KB
df_predplus.head()
| rating_numerator | rating_denominator | p1_dog | p2_dog | p3_dog | p123_dog | intercept | |
|---|---|---|---|---|---|---|---|
| tweet_id | |||||||
| 666020888022790149 | 8.0 | 10 | 1 | 1 | 1 | 1 | 1 |
| 666029285002620928 | 7.0 | 10 | 1 | 1 | 1 | 1 | 1 |
| 666033412701032449 | 9.0 | 10 | 1 | 1 | 1 | 1 | 1 |
| 666044226329800704 | 6.0 | 10 | 1 | 1 | 1 | 1 | 1 |
| 666049248165822465 | 5.0 | 10 | 1 | 1 | 1 | 1 | 1 |
print('From {} posts with predictions.'.format(len(df_predplus)))
print('The ML algorithm\'s top ranked prediction yielded {} dogs.'.format(df_predplus.p1_dog.sum()))
print('The conjunction of ranked predictions yielded {} dogs.'.format(df_predplus.p123_dog.sum()))
From 1973 posts with predictions. The ML algorithm's top ranked prediction yielded 1461 dogs. The conjunction of ranked predictions yielded 1189 dogs.
data = pd.DataFrame(df_predplus[['rating_numerator', 'p1_dog']].value_counts().sort_index())
data.rename(columns={0: 'count'})
data.unstack().plot(kind='bar', stacked=True, figsize=(7,5))
plt.xlabel('Rating', fontsize=14)
plt.ylabel('Counts', fontsize=14)
plt.title('Top Ranked ML Predictions of "Dog" versus "No Dog"\n at each Rating', fontsize=18)
plt.legend(labels=['0: Predict "No Dog"', '1: Predict "Dog"']);
# Reference: https://stackoverflow.com/questions/29525120/pandas-creating-a-histogram-from-string-counts
# Reference: https://stackoverflow.com/questions/34248741/plotting-pandas-multiindex-bar-chart
In the plot, the proportion of orange to blue per bar increases roughly as the ratings increase. This means that the proportion of ML predictions of "Dog" to "No Dog" increases roughly as the ratings increase. (Or, from the opposite perspective, lower ratings are stronger indicators of "No Dog".) This increase is particularly strong for ratings less than 10 and is less so (it appears to roughly flatten out) for ratings greater than or equal to 10.
This relationship suggests that the dog ratings themselves can be viewed somewhat as predictors of "Dog" versus "No Dog". We will examine this using logistic regression over a few cases:
1) Rating as a predictor of Top Ranked ML Dog Predictions
2) Rating as a predictor of the Conjunction of ML Dog Predictions
3) Rating as predictor of Top Ranked ML Dog Predictions for low ratings (< 10)
4) Rating as predictor of Top Ranked ML Dog Predictions for high ratings (>= 10)
#### Logistic Regression: Rating as predictor of Top Ranked ML Dog Predictions
model = sm.Logit(df_predplus['p1_dog'], df_predplus[['intercept', 'rating_numerator']] ) # Instantiate the model
result = model.fit() # Fit the model
result.summary()
Optimization terminated successfully.
Current function value: 0.532925
Iterations 5
| Dep. Variable: | p1_dog | No. Observations: | 1973 |
|---|---|---|---|
| Model: | Logit | Df Residuals: | 1971 |
| Method: | MLE | Df Model: | 1 |
| Date: | Fri, 12 Mar 2021 | Pseudo R-squ.: | 0.06919 |
| Time: | 14:30:51 | Log-Likelihood: | -1051.5 |
| converged: | True | LL-Null: | -1129.6 |
| Covariance Type: | nonrobust | LLR p-value: | 7.252e-36 |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| intercept | -1.9336 | 0.255 | -7.594 | 0.000 | -2.433 | -1.435 |
| rating_numerator | 0.2894 | 0.024 | 11.814 | 0.000 | 0.241 | 0.337 |
result.params
intercept -1.933619 rating_numerator 0.289381 dtype: float64
np.exp(result.params)
intercept 0.144624 rating_numerator 1.335600 dtype: float64
Interpretation
The resulting logistic model is:
$log_e \Big( \frac{p_i}{1 - p_i} \Big) = -1.933 + 0.289\times \text{rating}$.
Where $ \frac{p_i}{1 - p_i} $ is the odds of the ML algorithm predicting a "Dog" given a particular $\text{rating}$, the odds ratio is
$\text{Odds Ratio} = \frac{\text{Odds given rating = x + 1}}{\text{Odds given rating = x}}$.
It can be shown that
$\text{Odds Ratio} = exp(\text{coeff. of rating})$
so that in this case the odds ratio is $exp(0.289) = 1.34$ which indicates that we expect a multiplicative change of 1.34 in the odds for an increase in rating by 1. In other words, the model predicts a 34% increase in the odds of the ML algorithm predicting a "Dog" for each increase in rating by 1.
model = sm.Logit(df_predplus['p123_dog'], df_predplus[['intercept', 'rating_numerator']] ) # Instantiate the model
result = model.fit() # Fit the model
result.summary()
Optimization terminated successfully.
Current function value: 0.646417
Iterations 5
| Dep. Variable: | p123_dog | No. Observations: | 1973 |
|---|---|---|---|
| Model: | Logit | Df Residuals: | 1971 |
| Method: | MLE | Df Model: | 1 |
| Date: | Fri, 12 Mar 2021 | Pseudo R-squ.: | 0.03797 |
| Time: | 14:30:51 | Log-Likelihood: | -1275.4 |
| converged: | True | LL-Null: | -1325.7 |
| Covariance Type: | nonrobust | LLR p-value: | 1.086e-23 |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| intercept | -1.8763 | 0.245 | -7.652 | 0.000 | -2.357 | -1.396 |
| rating_numerator | 0.2183 | 0.023 | 9.530 | 0.000 | 0.173 | 0.263 |
result.params
intercept -1.876271 rating_numerator 0.218282 dtype: float64
np.exp(result.params)
intercept 0.153160 rating_numerator 1.243937 dtype: float64
Interpretation
The resulting logistic model is:
$log_e \Big( \frac{p_i}{1 - p_i} \Big) = -1.876 + 0.218\times \text{rating}$.
Where $ \frac{p_i}{1 - p_i} $ is the odds of the ML algorithm predicting a "Dog" given a particular $\text{rating}$, the odds ratio is
$\text{Odds Ratio} = \frac{\text{Odds given rating = x + 1}}{\text{Odds given rating = x}}$.
It can be shown that
$\text{Odds Ratio} = exp(\text{coeff. of rating})$
so that in this case the odds ratio is $exp(0.218) = 1.24$ which indicates that we expect a multiplicative change of 1.24 in the odds for an increase in rating by 1. In other words, the model predicts a 24% increase in the odds of the ML algorithm predicting a "Dog" for each increase in rating by 1.
The odds ratio for the Top Ranked ML Prediction was 1.34 and, in this case, the odds ratio for the Conjunction of ML Predictions was 1.24. So, whichever ML prediction technique we use, the "We Rate Dogs" rating itself seems to track with the ML algorithm in a loose way.
Next we look at how the ratings compare when partitioned into low (< 10) and high (>= 10). We expect that the ratings will be a stronger indicator over the low partition than the high.
df_predplus_low = df_predplus[df_predplus.rating_numerator < 10].copy()
df_predplus_low.shape
(411, 7)
print(df_predplus_low.p1_dog.sum(), len(df_predplus_low) - df_predplus_low.p1_dog.sum() )
df_predplus_low.p1_dog.mean()
220 191
0.5352798053527981
df_predplus_low[['rating_numerator', 'p1_dog']].value_counts().sort_index()
rating_numerator p1_dog
0.00 0 2
1.00 0 4
2.00 0 8
1 1
3.00 0 16
1 3
4.00 0 11
1 4
5.00 0 23
1 9
6.00 0 20
1 12
7.00 0 27
1 24
8.00 0 37
1 58
9.00 0 43
1 108
9.75 1 1
dtype: int64
data = pd.DataFrame(df_predplus_low[['rating_numerator', 'p1_dog']].value_counts().sort_index())
data.rename(columns={0: 'count'})
data.unstack().plot(kind='bar', stacked=True)
plt.xlabel('Rating', fontsize=14)
plt.ylabel('Counts', fontsize=14)
plt.title('Top Ranked ML Predictions of "Dog" versus "No Dog"\n at Low Ratings (< 10)', fontsize=18)
plt.legend(labels=['0: Predict "No Dog"', '1: Predict "Dog"']);
model = sm.Logit(df_predplus_low['p1_dog'], df_predplus_low[['intercept', 'rating_numerator']] ) # Instantiate the model
result = model.fit() # Fit the model
result.summary()
Optimization terminated successfully.
Current function value: 0.608730
Iterations 6
| Dep. Variable: | p1_dog | No. Observations: | 411 |
|---|---|---|---|
| Model: | Logit | Df Residuals: | 409 |
| Method: | MLE | Df Model: | 1 |
| Date: | Fri, 12 Mar 2021 | Pseudo R-squ.: | 0.1186 |
| Time: | 14:30:51 | Log-Likelihood: | -250.19 |
| converged: | True | LL-Null: | -283.86 |
| Covariance Type: | nonrobust | LLR p-value: | 2.282e-16 |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| intercept | -3.1668 | 0.484 | -6.538 | 0.000 | -4.116 | -2.217 |
| rating_numerator | 0.4517 | 0.063 | 7.162 | 0.000 | 0.328 | 0.575 |
result.params
intercept -3.166793 rating_numerator 0.451743 dtype: float64
np.exp(result.params)
intercept 0.042139 rating_numerator 1.571049 dtype: float64
Interpretation
The resulting logistic model is:
$log_e \Big( \frac{p_i}{1 - p_i} \Big) = -3.167 + 0.452\times \text{rating}$.
Where $ \frac{p_i}{1 - p_i} $ is the odds of the ML algorithm predicting a "Dog" given a particular $\text{rating}$, the odds ratio is
$\text{Odds Ratio} = \frac{\text{Odds given rating = x + 1}}{\text{Odds given rating = x}}$.
It can be shown that
$\text{Odds Ratio} = exp(\text{coeff. of rating})$
so that in this case the odds ratio is $exp(0.452) = 1.57$ which indicates that we expect a multiplicative change of 1.57 in the odds for an increase in rating by 1. In other words, the model predicts a 57% increase in the odds of the ML algorithm predicting a "Dog" for each increase in rating by 1.
df_predplus_high = df_predplus[df_predplus.rating_numerator >= 10].copy()
df_predplus_high.shape
(1562, 7)
print(df_predplus_high.p1_dog.sum(), len(df_predplus_high) - df_predplus_high.p1_dog.sum() )
df_predplus_high.p1_dog.mean()
1241 321
0.79449423815621
df_predplus_high[['rating_numerator', 'p1_dog']].value_counts().sort_index()
rating_numerator p1_dog
10.00 0 101
1 318
11.00 0 89
1 307
11.26 1 1
11.27 1 1
12.00 0 70
1 376
13.00 0 47
1 215
13.50 1 1
14.00 0 14
1 22
dtype: int64
data = pd.DataFrame(df_predplus_high[['rating_numerator', 'p1_dog']].value_counts().sort_index())
data.rename(columns={0: 'count'})
data.unstack().plot(kind='bar', stacked=True)
plt.xlabel('Rating', fontsize=14)
plt.ylabel('Counts', fontsize=14)
plt.title('Top Ranked ML Predictions of "Dog" versus "No Dog"\n at High Ratings (>= 10)', fontsize=18)
plt.legend(labels=['0: Predict "No Dog"', '1: Predict "Dog"']);
model = sm.Logit(df_predplus_high['p1_dog'], df_predplus_high[['intercept', 'rating_numerator']] ) # Instantiate the model
result = model.fit() # Fit the model
result.summary()
Optimization terminated successfully.
Current function value: 0.506972
Iterations 5
| Dep. Variable: | p1_dog | No. Observations: | 1562 |
|---|---|---|---|
| Model: | Logit | Df Residuals: | 1560 |
| Method: | MLE | Df Model: | 1 |
| Date: | Fri, 12 Mar 2021 | Pseudo R-squ.: | 0.001907 |
| Time: | 14:30:51 | Log-Likelihood: | -791.89 |
| converged: | True | LL-Null: | -793.40 |
| Covariance Type: | nonrobust | LLR p-value: | 0.08193 |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| intercept | 0.2386 | 0.644 | 0.371 | 0.711 | -1.023 | 1.500 |
| rating_numerator | 0.0978 | 0.056 | 1.733 | 0.083 | -0.013 | 0.208 |
result.params
intercept 0.238590 rating_numerator 0.097793 dtype: float64
np.exp(result.params)
intercept 1.269457 rating_numerator 1.102735 dtype: float64
Interpretation
The resulting logistic model is:
$log_e \Big( \frac{p_i}{1 - p_i} \Big) = 0.239 + 0.0978\times \text{rating}$.
Where $ \frac{p_i}{1 - p_i} $ is the odds of the ML algorithm predicting a "Dog" given a particular $\text{rating}$, the odds ratio is
$\text{Odds Ratio} = \frac{\text{Odds given rating = x + 1}}{\text{Odds given rating = x}}$.
It can be shown that
$\text{Odds Ratio} = exp(\text{coeff. of rating})$
so that in this case the odds ratio is $exp(0.0978) = 1.10$ which indicates that we expect a multiplicative change of 1.10 in the odds for an increase in rating by 1. In other words, the model predicts a 10% increase in the odds of the ML algorithm predicting a "Dog" for each increase in rating by 1.
So, when comparing the use of rating as a predictor of the ML algorithm's top ranked prediction providing a verdict of "Dog", the lower ratings yield an odds ratio of 1.57 as compared to an odds ratio of 1.10 for the upper ratings. At the lower ratings, differences in the ratings yield more information about whether there is a dog or not as the subject of the post (taking the ML algorithm's predictions as a baseline).