barrpet11
New member
Wrote a shitty python3 script for Sheep. May be useful to someone else in the future.
Also of note: md5 over regular http. Hope no one is using their password here anywhere else.
Also of note: md5 over regular http. Hope no one is using their password here anywhere else.
Code:
# ogboards.com PM scraper
# Written by barrpet11
# Last updated 2015-08-18 04:20:50
# INSTRUCTIONS
# Enter username and password for ogboards for the script to work (lines 23-24)
# Run the script and look for the output file in the same directory
import sys
import hashlib
import requests
import re
import csv
# ogboards.com info
username = ''
password = ''
BASE_URL = 'http://www.ogboards.com/forums/'
# Set up a new session
session = requests.Session()
login_response = session.post(BASE_URL + 'login.php?do=login', {
'vb_login_username': username,
'vb_login_password': '',
'vb_login_md5password': hashlib.md5(password.encode()).hexdigest(),
'vb_login_md5password_utf': hashlib.md5(password.encode()).hexdigest(),
'cookieuser': 1,
'do': 'login',
's': '',
'securitytoken': 'guest'
})
# Exit on request error
login_response.raise_for_status()
# Check for login success
if login_response.text.find('Thank you for logging in') < 0:
sys.exit('Login unsuccessful.')
# Download all PM's as csv
pm_response = session.get(BASE_URL + 'private.php?do=downloadpm&dowhat=csv',
headers = { 'Referer': 'http://www.ogboards.com/forums/private.php'
})
# Exit on request error
login_response.raise_for_status()
# Check for PM download success
if pm_response.text.find('Date,Folder') < 0:
sys.exit('PM download unsuccessful.')
# Save PMs txt to file
pm_file = open('pm_download.csv', 'w')
pm_file.write(pm_response.text)
pm_file.close()
################################################################################
# SHEEP SPECIFIC
################################################################################
# Process the PMs
# NOTE
# This is pretty bad but I did it fast and it should work well enough
# Will remove numbers from entries:
# '1. example' and '1) example' become 'example'
# Will have to manually correct some entries if people added lines that
# weren't answers (will take all lines, even if > 12)
# TODO: fix this nasty shit
regx_lines = re.compile('[\r]*\n')
regx_nums = re.compile('^[0-9]{1,2}[.\)]+[\t\ ]+', re.MULTILINE)
ret = list()
with open('pm_download.csv', newline='') as f:
reader = csv.reader(f)
for pm in reader:
if pm[1] == 'Inbox':
pm_timestamp = pm[0] #TODO: check date/time (last 3/4/5 days?)
pm_subject = pm[2]
pm_from = pm[3]
pm_msg = pm[5]
pm_msg = regx_nums.sub('', pm_msg)
pm_items = regx_lines.split(pm_msg)
pm_items = list(filter(None, pm_items))
if len(pm_items) >= 12:
result = list()
result.append(pm_timestamp)
result.append(pm_from)
result.extend(pm_items)
ret.append(result)
# Write the csv to file
out_file = open('sheep_poll.csv', 'w')
wr = csv.writer(out_file, quoting=csv.QUOTE_ALL)
for poll in ret:
wr.writerow(poll)
print('Success!\nPolling saved to sheep_poll.csv.')