uri_fetcher.rb revision 3554937a935585f1c6aea3e25895036cf3f74759
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehasemodule UriFetcher
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase def fetch_uri_content(uri, limit: 10, write_file: nil)
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase raise ArgumentError, 'too many HTTP redirects' if limit == 0
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase Net::HTTP.get_request(URI(uri)) do |response|
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase if has_actual_content?(response)
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase produce_response_body(response, write_file)
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase else
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase fetch_uri_content(response['location'],
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase limit: limit-1,
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase write_file: write_file)
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase end
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase end
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase end
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase private
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase def produce_response_body(response, write_file=nil)
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase if write_file
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase File.open(write_file.to_s, 'w') do |file|
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase file.flock(File::LOCK_EX)
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase response.read_body do |chunk|
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase file.write chunk
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase end
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase end
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase write_file
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase else
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase response.body
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase end
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase end
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase def has_actual_content?(response)
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase response.is_a?(HTTPSuccess) && response.content_type != 'text/html'
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase end
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehase
3554937a935585f1c6aea3e25895036cf3f74759Tim Reddehaseend