require 'cgi'
require 'nokogiri'
require 'html2haml/html/erb'
# Haml monkeypatches various Nokogiri classes
# to add methods for conversion to Haml.
# @private
module Nokogiri
module XML
# @see Nokogiri
class Node
# Whether this node has already been converted to Haml.
# Only used for text nodes and elements.
#
# @return [Boolean]
attr_accessor :converted_to_haml
# Returns the Haml representation of the given node.
#
# @param tabs [Fixnum] The indentation level of the resulting Haml.
# @option options (see Html2haml::HTML#initialize)
def to_haml(tabs, options)
return "" if converted_to_haml || to_s.strip.empty?
text = uninterp(self.to_s)
#ending in a newline stops the inline nodes
if text.end_with?("\n")
parse_text_with_interpolation(text, tabs)
else
text << process_inline_nodes(next_sibling)
parse_text_with_interpolation(text, tabs)
end
end
private
def erb_to_interpolation(text, options)
return text unless options[:erb]
text = CGI.escapeHTML(uninterp(text))
%w[ ].each {|str| text.gsub!(CGI.escapeHTML(str), str)}
::Nokogiri::XML.fragment(text).children.inject("") do |str, elem|
if elem.is_a?(::Nokogiri::XML::Text)
str + CGI.unescapeHTML(elem.to_s)
else # element
str + '#{' + CGI.unescapeHTML(elem.inner_text.strip) + '}'
end
end
end
def tabulate(tabs)
' ' * tabs
end
def uninterp(text)
text.gsub('#{', '\#{') #'
end
def attr_hash
Hash[attributes.map {|k, v| [k.to_s, v.to_s]}]
end
def parse_text(text, tabs)
parse_text_with_interpolation(uninterp(text), tabs)
end
def parse_text_with_interpolation(text, tabs)
text.strip!
return "" if text.empty?
text.split("\n").map do |line|
line.strip!
"#{tabulate(tabs)}#{'\\' if Haml::Parser::SPECIAL_CHARACTERS.include?(line[0])}#{line}\n"
end.join
end
def process_inline_nodes(node)
text = ""
while node.is_a?(::Nokogiri::XML::Element) && node.name == "haml_loud"
node.converted_to_haml = true
text << '#{' <<
CGI.unescapeHTML(node.inner_text).gsub(/\n\s*/, ' ').strip << '}'
if node.next_sibling.is_a?(::Nokogiri::XML::Text)
node = node.next_sibling
text << uninterp(node.to_s)
node.converted_to_haml = true
end
node = node.next_sibling
end
text
end
end
end
end
# @private
HAML_TAGS = %w[haml_block haml_loud haml_silent]
#
# HAML_TAGS.each do |t|
# Nokogiri::XML::ElementContent[t] = {}
# Nokogiri::XML::ElementContent.keys.each do |key|
# Nokogiri::XML::ElementContent[t][key.hash] = true
# end
# end
#
# Nokogiri::XML::ElementContent.keys.each do |k|
# HAML_TAGS.each do |el|
# val = Nokogiri::XML::ElementContent[k]
# val[el.hash] = true if val.is_a?(Hash)
# end
# end
module Html2haml
# Converts HTML documents into Haml templates.
# Depends on [Nokogiri](http://nokogiri.org/) for HTML parsing.
# If ERB conversion is being used, also depends on
# [Erubis](http://www.kuwata-lab.com/erubis) to parse the ERB
# and [ruby_parser](http://parsetree.rubyforge.org/) to parse the Ruby code.
#
# Example usage:
#
# HTML.new("Blat").render
# #=> "%a{:href => 'http://google.com'} Blat"
class HTML
# @param template [String, Nokogiri::Node] The HTML template to convert
# @option options :erb [Boolean] (false) Whether or not to parse
# ERB's `<%= %>` and `<% %>` into Haml's `=` and `-`
# @option options :xhtml [Boolean] (false) Whether or not to parse
# the HTML strictly as XHTML
def initialize(template, options = {})
@options = options
if template.is_a? Nokogiri::XML::Node
@template = template
else
if template.is_a? IO
template = template.read
end
template = Haml::Util.check_encoding(template) {|msg, line| raise Haml::Error.new(msg, line)}
if @options[:erb]
require 'html2haml/html/erb'
template = ERB.compile(template)
end
@template = detect_proper_parser(template)
end
end
def detect_proper_parser(template)
if template =~ /^\s*\n/, "")
end
end
# @see Nokogiri
# @private
class ::Nokogiri::XML::DTD
# @see Html2haml::HTML::Node#to_haml
def to_haml(tabs, options)
attrs = external_id.nil? ? ["", "", ""] :
external_id.scan(/DTD\s+([^\s]+)\s*([^\s]*)\s*([^\s]*)\s*\/\//)[0]
raise Haml::SyntaxError.new("Invalid doctype") if attrs == nil
type, version, strictness = attrs.map { |a| a.downcase }
if type == "html"
version = ""
strictness = "strict" if strictness == ""
end
if version == "1.0" || version.empty?
version = nil
end
if strictness == 'transitional' || strictness.empty?
strictness = nil
end
version = " #{version.capitalize}" if version
strictness = " #{strictness.capitalize}" if strictness
"#{tabulate(tabs)}!!!#{version}#{strictness}\n"
end
end
# @see Nokogiri
# @private
class ::Nokogiri::XML::Comment
# @see Html2haml::HTML::Node#to_haml
def to_haml(tabs, options)
content = self.content
if content =~ /\A(\[[^\]]+\])>(.*) 1 # Multiline script block
# Normalize the indentation so that the last line is the base
indent_str = lines.last[/^[ \t]*/]
indent_re = /^[ \t]{0,#{indent_str.count(" ") + 8 * indent_str.count("\t")}}/
lines.map! {|s| s.gsub!(indent_re, '')}
# Add an extra " " to make it indented relative to "= "
lines[1..-1].each {|s| s.gsub!(/^/, " ")}
# Add | at the end, properly aligned
length = lines.map {|s| s.size}.max + 1
lines.map! {|s| "%#{-length}s|" % s}
if next_sibling && next_sibling.is_a?(Nokogiri::XML::Element) && next_sibling.name == "haml_loud" &&
next_sibling.inner_text.split("\n").reject {|s| s.strip.empty?}.size > 1
lines << "-#"
end
end
return lines.map {|s| output + s + "\n"}.join
when "haml_silent"
return CGI.unescapeHTML(inner_text).split("\n").map do |line|
next "" if line.strip.empty?
"#{output}- #{line.strip}\n"
end.join
when "haml_block"
return render_children("", tabs, options)
end
end
if self.next && self.next.text? && self.next.content =~ /\A[^\s]/
if self.previous.nil? || self.previous.text? &&
(self.previous.content =~ /[^\s]\Z/ ||
self.previous.content =~ /\A\s*\Z/ && self.previous.previous.nil?)
nuke_outer_whitespace = true
else
output << "= succeed #{self.next.content.slice!(/\A[^\s]+/).dump} do\n"
tabs += 1
output << tabulate(tabs)
#empty the text node since it was inserted into the block
self.next.content = ""
end
end
output << "%#{name}" unless name.to_s == 'div' &&
(static_id?(options) ||
static_classname?(options) &&
attr_hash['class'].to_s.split(' ').any?(&method(:haml_css_attr?)))
if attr_hash
if static_id?(options)
output << "##{attr_hash['id'].to_s}"
remove_attribute('id')
end
if static_classname?(options)
leftover = attr_hash['class'].to_s.split(' ').reject do |c|
next unless haml_css_attr?(c)
output << ".#{c}"
end
remove_attribute('class')
set_attribute('class', leftover.join(' ')) unless leftover.empty?
end
output << haml_attributes(options) if attr_hash.length > 0
end
output << ">" if nuke_outer_whitespace
output << "/" if to_xhtml.end_with?("/>")
if children && children.size == 1
child = children.first
if child.is_a?(::Nokogiri::XML::Text)
if !child.to_s.include?("\n")
text = child.to_haml(tabs + 1, options)
return output + " " + text.lstrip.gsub(/^\\/, '') unless text.chomp.include?("\n") || text.empty?
return output + "\n" + text
elsif ["pre", "textarea"].include?(name) ||
(name == "code" && parent.is_a?(::Nokogiri::XML::Element) && parent.name == "pre")
return output + "\n#{tabulate(tabs + 1)}:preserve\n" +
inner_text.gsub(/^/, tabulate(tabs + 2))
end
elsif child.is_a?(::Nokogiri::XML::Element) && child.name == "haml_loud"
return output + child.to_haml(tabs + 1, options).lstrip
end
end
render_children(output + "\n", tabs, options)
end
private
def render_children(so_far, tabs, options)
(self.children || []).inject(so_far) do |output, child|
output + child.to_haml(tabs + 1, options)
end
end
def dynamic_attributes
#reject any attrs without
@dynamic_attributes = attr_hash.select {|name, value| value =~ %r{ #{value}"
end
if options[:ruby19_style_attributes]
return "#{name}: #{value}"
end
":#{name} => #{value}"
end
end
end
end