require 'cgi' require 'nokogiri' require 'html2haml/html/erb' # Haml monkeypatches various Nokogiri classes # to add methods for conversion to Haml. # @private module Nokogiri module XML # @see Nokogiri class Node # Whether this node has already been converted to Haml. # Only used for text nodes and elements. # # @return [Boolean] attr_accessor :converted_to_haml # Returns the Haml representation of the given node. # # @param tabs [Fixnum] The indentation level of the resulting Haml. # @option options (see Html2haml::HTML#initialize) def to_haml(tabs, options) return "" if converted_to_haml || to_s.strip.empty? text = uninterp(self.to_s) #ending in a newline stops the inline nodes if text.end_with?("\n") parse_text_with_interpolation(text, tabs) else text << process_inline_nodes(next_sibling) parse_text_with_interpolation(text, tabs) end end private def erb_to_interpolation(text, options) return text unless options[:erb] text = CGI.escapeHTML(uninterp(text)) %w[ ].each {|str| text.gsub!(CGI.escapeHTML(str), str)} ::Nokogiri::XML.fragment(text).children.inject("") do |str, elem| if elem.is_a?(::Nokogiri::XML::Text) str + CGI.unescapeHTML(elem.to_s) else # element str + '#{' + CGI.unescapeHTML(elem.inner_text.strip) + '}' end end end def tabulate(tabs) ' ' * tabs end def uninterp(text) text.gsub('#{', '\#{') #' end def attr_hash Hash[attributes.map {|k, v| [k.to_s, v.to_s]}] end def parse_text(text, tabs) parse_text_with_interpolation(uninterp(text), tabs) end def parse_text_with_interpolation(text, tabs) text.strip! return "" if text.empty? text.split("\n").map do |line| line.strip! "#{tabulate(tabs)}#{'\\' if Haml::Parser::SPECIAL_CHARACTERS.include?(line[0])}#{line}\n" end.join end def process_inline_nodes(node) text = "" while node.is_a?(::Nokogiri::XML::Element) && node.name == "haml_loud" node.converted_to_haml = true text << '#{' << CGI.unescapeHTML(node.inner_text).gsub(/\n\s*/, ' ').strip << '}' if node.next_sibling.is_a?(::Nokogiri::XML::Text) node = node.next_sibling text << uninterp(node.to_s) node.converted_to_haml = true end node = node.next_sibling end text end end end end # @private HAML_TAGS = %w[haml_block haml_loud haml_silent] # # HAML_TAGS.each do |t| # Nokogiri::XML::ElementContent[t] = {} # Nokogiri::XML::ElementContent.keys.each do |key| # Nokogiri::XML::ElementContent[t][key.hash] = true # end # end # # Nokogiri::XML::ElementContent.keys.each do |k| # HAML_TAGS.each do |el| # val = Nokogiri::XML::ElementContent[k] # val[el.hash] = true if val.is_a?(Hash) # end # end module Html2haml # Converts HTML documents into Haml templates. # Depends on [Nokogiri](http://nokogiri.org/) for HTML parsing. # If ERB conversion is being used, also depends on # [Erubis](http://www.kuwata-lab.com/erubis) to parse the ERB # and [ruby_parser](http://parsetree.rubyforge.org/) to parse the Ruby code. # # Example usage: # # HTML.new("Blat").render # #=> "%a{:href => 'http://google.com'} Blat" class HTML # @param template [String, Nokogiri::Node] The HTML template to convert # @option options :erb [Boolean] (false) Whether or not to parse # ERB's `<%= %>` and `<% %>` into Haml's `=` and `-` # @option options :xhtml [Boolean] (false) Whether or not to parse # the HTML strictly as XHTML def initialize(template, options = {}) @options = options if template.is_a? Nokogiri::XML::Node @template = template else if template.is_a? IO template = template.read end template = Haml::Util.check_encoding(template) {|msg, line| raise Haml::Error.new(msg, line)} if @options[:erb] require 'html2haml/html/erb' template = ERB.compile(template) end @template = detect_proper_parser(template) end end def detect_proper_parser(template) if template =~ /^\s*\n/, "") end end # @see Nokogiri # @private class ::Nokogiri::XML::DTD # @see Html2haml::HTML::Node#to_haml def to_haml(tabs, options) attrs = external_id.nil? ? ["", "", ""] : external_id.scan(/DTD\s+([^\s]+)\s*([^\s]*)\s*([^\s]*)\s*\/\//)[0] raise Haml::SyntaxError.new("Invalid doctype") if attrs == nil type, version, strictness = attrs.map { |a| a.downcase } if type == "html" version = "" strictness = "strict" if strictness == "" end if version == "1.0" || version.empty? version = nil end if strictness == 'transitional' || strictness.empty? strictness = nil end version = " #{version.capitalize}" if version strictness = " #{strictness.capitalize}" if strictness "#{tabulate(tabs)}!!!#{version}#{strictness}\n" end end # @see Nokogiri # @private class ::Nokogiri::XML::Comment # @see Html2haml::HTML::Node#to_haml def to_haml(tabs, options) content = self.content if content =~ /\A(\[[^\]]+\])>(.*) 1 # Multiline script block # Normalize the indentation so that the last line is the base indent_str = lines.last[/^[ \t]*/] indent_re = /^[ \t]{0,#{indent_str.count(" ") + 8 * indent_str.count("\t")}}/ lines.map! {|s| s.gsub!(indent_re, '')} # Add an extra " " to make it indented relative to "= " lines[1..-1].each {|s| s.gsub!(/^/, " ")} # Add | at the end, properly aligned length = lines.map {|s| s.size}.max + 1 lines.map! {|s| "%#{-length}s|" % s} if next_sibling && next_sibling.is_a?(Nokogiri::XML::Element) && next_sibling.name == "haml_loud" && next_sibling.inner_text.split("\n").reject {|s| s.strip.empty?}.size > 1 lines << "-#" end end return lines.map {|s| output + s + "\n"}.join when "haml_silent" return CGI.unescapeHTML(inner_text).split("\n").map do |line| next "" if line.strip.empty? "#{output}- #{line.strip}\n" end.join when "haml_block" return render_children("", tabs, options) end end if self.next && self.next.text? && self.next.content =~ /\A[^\s]/ if self.previous.nil? || self.previous.text? && (self.previous.content =~ /[^\s]\Z/ || self.previous.content =~ /\A\s*\Z/ && self.previous.previous.nil?) nuke_outer_whitespace = true else output << "= succeed #{self.next.content.slice!(/\A[^\s]+/).dump} do\n" tabs += 1 output << tabulate(tabs) #empty the text node since it was inserted into the block self.next.content = "" end end output << "%#{name}" unless name.to_s == 'div' && (static_id?(options) || static_classname?(options) && attr_hash['class'].to_s.split(' ').any?(&method(:haml_css_attr?))) if attr_hash if static_id?(options) output << "##{attr_hash['id'].to_s}" remove_attribute('id') end if static_classname?(options) leftover = attr_hash['class'].to_s.split(' ').reject do |c| next unless haml_css_attr?(c) output << ".#{c}" end remove_attribute('class') set_attribute('class', leftover.join(' ')) unless leftover.empty? end output << haml_attributes(options) if attr_hash.length > 0 end output << ">" if nuke_outer_whitespace output << "/" if to_xhtml.end_with?("/>") if children && children.size == 1 child = children.first if child.is_a?(::Nokogiri::XML::Text) if !child.to_s.include?("\n") text = child.to_haml(tabs + 1, options) return output + " " + text.lstrip.gsub(/^\\/, '') unless text.chomp.include?("\n") || text.empty? return output + "\n" + text elsif ["pre", "textarea"].include?(name) || (name == "code" && parent.is_a?(::Nokogiri::XML::Element) && parent.name == "pre") return output + "\n#{tabulate(tabs + 1)}:preserve\n" + inner_text.gsub(/^/, tabulate(tabs + 2)) end elsif child.is_a?(::Nokogiri::XML::Element) && child.name == "haml_loud" return output + child.to_haml(tabs + 1, options).lstrip end end render_children(output + "\n", tabs, options) end private def render_children(so_far, tabs, options) (self.children || []).inject(so_far) do |output, child| output + child.to_haml(tabs + 1, options) end end def dynamic_attributes #reject any attrs without @dynamic_attributes = attr_hash.select {|name, value| value =~ %r{ #{value}" end if options[:ruby19_style_attributes] return "#{name}: #{value}" end ":#{name} => #{value}" end end end end