import scrapy
import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait

def click_n_wait(driver, button, timeout=10):
  source = driver.page_source
  button.click()
  def compare_source(driver):
    return source != driver.page_source
  try:
    WebDriverWait(driver,timeout).until(compare_source)
  except Exception: # Timeout, or instrumentation error
    pass

class PoliticoSpider(scrapy.Spider):
    name = 'politicospider'
    start_urls = ['http://www.politico.com/search?start=11%2F01%2F2015&end=11%2F30%2F2015&s=newest']

    def __init__(self):
      self.download_delay = 1
      self.selenium_driver = webdriver.Firefox()
      time.sleep(2)

    def parse(self, response):
      for url in response.xpath('//h3/a/@href').extract():
        if "/story/2015/11/" in url:
          self.selenium_driver.get(url)
          element=self.selenium_driver.find_element_by_id("showCommentsButton")
          click_n_wait(self.selenium_driver,element)
          filename=url.split('/')[-1]
          f=open(filename,'w')
          print >>f, self.selenium_driver.page_source.encode('utf-8')
          f.close()

      for url in response.xpath('//a[contains(.,"Next page")]/@href').extract():
          yield scrapy.Request(response.urljoin(url))
