# File lib/scraper/base.rb, line 747
747:     def scrape()
748:       # Call prepare with the document, but before doing anything else.
749:       prepare document
750:       # Retrieve the document. This may raise HTTPError or HTMLParseError.
751:       case document
752:       when Array
753:         stack = @document.reverse # see below
754:       when HTML::Node
755:         # If a root element is specified, start selecting from there.
756:         # The stack is empty if we can't find any root element (makes
757:         # sense). However, the node we're going to process may be
758:         # a tag, or an HTML::Document.root which is the equivalent of
759:         # a document fragment.
760:         root_element = option(:root_element)
761:         root = root_element ? @document.find(:tag=>root_element) : @document
762:         stack = root ? (root.tag? ? [root] : root.children.reverse) : []
763:       else
764:         return
765:       end
766:       # @skip stores all the elements we want to skip (see #skip).
767:       # rules stores all the rules we want to process with this
768:       # scraper, based on the class definition.
769:       @skip = []
770:       @stop = false
771:       rules = self.class.rules.clone
772:       begin
773:         # Process the document one node at a time. We process elements
774:         # from the end of the stack, so each time we visit child elements,
775:         # we add them to the end of the stack in reverse order.
776:         while node = stack.pop
777:           break if @stop
778:           skip_this = false
779:           # Only match nodes that are elements, ignore text nodes.
780:           # Also ignore any element that's on the skip list, and if
781:           # found one, remove it from the list (since we never visit
782:           # the same element twice). But an element may be added twice
783:           # to the skip list.
784:           # Note: equal? is faster than == for nodes.
785:           next unless node.tag?
786:           @skip.delete_if { |s| skip_this = true if s.equal?(node) }
787:           next if skip_this
788: 
789:           # Run through all the rules until we process the element or
790:           # run out of rules. If skip_this=true then we processed the
791:           # element and we can break out of the loop. However, we might
792:           # process (and skip) descedants so also watch the skip list.
793:           rules.delete_if do |selector, extractor, rule_name, first_only|
794:             break if skip_this
795:             # The result of calling match (selected) is nil, element
796:             # or array of elements. We turn it into an array to
797:             # process one element at a time. We process all elements
798:             # that are not on the skip list (we haven't visited
799:             # them yet).
800:             if selected = selector.match(node, first_only)
801:               selected = [selected] unless selected.is_a?(Array)
802:               selected = [selected.first] if first_only
803:               selected.each do |element|
804:                 # Do not process elements we already skipped
805:                 # (see above). However, this time we may visit
806:                 # an element twice, since selected elements may
807:                 # be descendants of the current element on the
808:                 # stack. In rare cases two elements on the stack
809:                 # may pick the same descendants.
810:                 next if @skip.find { |s| s.equal?(element) }
811:                 # Call the extractor method with this element.
812:                 # If it returns true, skip the element and if
813:                 # the current element, don't process any more
814:                 # rules. Again, pay attention to descendants.
815:                 if extractor.bind(self).call(element)
816:                   @extracted = true
817:                 end
818:                 if @skip.delete(true)
819:                   if element.equal?(node)
820:                     skip_this = true
821:                   else
822:                     @skip << element
823:                   end
824:                 end
825:               end
826:               first_only if !selected.empty?
827:             end
828:           end
829: 
830:           # If we did not skip the element, we're going to process its
831:           # children. Reverse order since we're popping from the stack.
832:           if !skip_this && children = node.children
833:             stack.concat children.reverse
834:           end
835:         end
836:       ensure
837:         @skip = nil
838:       end
839:       collect
840:       return result
841:     end