Menu
Cyaxares presents...
BBOYT
(Beneath the Bottom Of YouTube)
It's not about the videos... It's the comments!
For those wondering how these chat transcripts are captured, here is the python/selenium code
used to record the the live chat during broadcast. This script requires python, selenium, chrome browser and chromedriver. Output goes to standard out. Usage is: chat-transcript.py cycles Interval youtube-video-id cycle: number of queries to pop-up chat page before ending script interval: interval in seconds between queries youtube-video-id: the ID generated by youtube for the live stream video chat-transcript.py: import time from pyvirtualdisplay import Display from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from selenium.webdriver.common.action_chains import ActionChains import sys import datetime import traceback display = Display(visible=0, size=(800, 700)) display.start() now = datetime.datetime.now() print "Current date and time:", now.strftime("%Y-%m-%d %H:%M:%S") driver = webdriver.Chrome() # Optional argument, if not specified will search path. driver.get('https://www.youtube.com/live_chat?v='+sys.argv[3]); print 'https://www.youtube.com/live_chat?v=' + sys.argv[3]; print "https://www.youtube.com/watch?v=" + sys.argv[3] sys.stdout.flush() time.sleep(5) last_id = '' end_reason = "timeout:"; last_id_not_changed = 0; not_first_time=0; actions = ActionChains(driver) WebDriverWait(driver, 120).until(EC.presence_of_element_located((By.TAG_NAME, "paper-button"))) button = driver.find_element_by_tag_name('paper-button'); actions.move_to_element(button).click(button).perform() WebDriverWait(driver, 120).until(EC.presence_of_element_located((By.ID, "menu"))) el = driver.find_element_by_id('menu') for option in el.find_elements_by_tag_name('paper-item'): time.sleep(5); if option.text == 'Live chat\nAll messages are visible': option.click() break time.sleep(5); i = 0 while i <= int(sys.argv[1]): if last_id_not_changed > 25: end_reason = "chat is over:"; sys.stdout.flush() time.sleep(5) break list_items2 = driver.find_elements_by_xpath("//div[@id='items']//yt-live-chat-text-message-renderer"); if len(list_items2) == 0: time.sleep(5); continue item_ids = driver.find_elements_by_xpath('//yt-live-chat-text-message-renderer'); try: j = 0; start_id = 0; for id in list_items2: j = j + 1; if last_id == id.get_attribute("id"): start_id = j; if last_id == id.get_attribute("id"): last_id_not_changed = last_id_not_changed + 1; else: last_id_not_changed = 0; last_id = id.get_attribute("id"); k = 0; for item in list_items2: k = k + 1; if not_first_time == 1: if k <= start_id: continue; not_first_time = 1; if item.get_attribute("class") == 'style-scope yt-live-chat-ticker-paid-message-item-renderer': continue; if item.get_attribute("class") == 'style-scope yt-live-chat-paid-message-renderer': continue; author = item.find_element_by_id("author-name"); author_type = author.get_attribute("class"); participant_type = author_type.split(' ', 1)[0]; ts = item.find_element_by_id("timestamp");; stamp = item.find_element_by_id("timestamp") ) timestamp = stamp.get_attribute('innerHTML') message = item.find_element_by_id("message"); message_text = message.text.encode('ascii', 'ignore') message_text = message_text.replace('[', '('); message_text = message_text.replace(']', ')'); try: url = message.find_element_by_css_selector('a').get_attribute('href') except: href = ""; else: href = " " + url.encode('ascii', 'ignore'); deleted = item.find_element_by_id("deleted-state"); deleted_text = ''; if deleted.text != '': del_message = item.find_element_by_id('message'); message_text = del_message.get_attribute('innerHTML').encode('ascii', 'ignore'); message_text = message_text.replace('[', '('); message_text = message_text.replace(']', ')'); deleted_text = deleted.text.encode('ascii', 'ignore'); deleted_text = deleted_text.replace('[', '('); deleted_text = deleted_text.replace(']', ')'); mark = "" if participant_type == 'moderator': mark='(*) ' if participant_type == 'owner': mark='(**) ' if True: print stamp.get_attribute('innerHTML'), "[" + author.text.encode('ascii','ignore') + "]", mark + message_text + " " + deleted_text + href except KeyboardInterrupt: end_reason = "Ctrl+C!"; now = datetime.datetime.now() sys.stdout.flush() time.sleep(5) break except: print 'Exception caught' traceback.print_exc() continue sys.stdout.flush() time.sleep(int(sys.argv[2])) i=i+1 now = datetime.datetime.now() print "[", sys.argv[3], "] ~ Ended with", end_reason, now.strftime("%Y-%m-%d %H:%M:%S") sys.stdout.flush() time.sleep(5) driver.quit() display.stop()
0 Comments
|
Cyaxares
The Good Ruler Categories
All
|