xml_decoder.go 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. package xml
  2. import (
  3. "encoding/xml"
  4. "fmt"
  5. "strings"
  6. )
  7. // NodeDecoder is a XML decoder wrapper that is responsible to decoding
  8. // a single XML Node element and it's nested member elements. This wrapper decoder
  9. // takes in the start element of the top level node being decoded.
  10. type NodeDecoder struct {
  11. Decoder *xml.Decoder
  12. StartEl xml.StartElement
  13. }
  14. // WrapNodeDecoder returns an initialized XMLNodeDecoder
  15. func WrapNodeDecoder(decoder *xml.Decoder, startEl xml.StartElement) NodeDecoder {
  16. return NodeDecoder{
  17. Decoder: decoder,
  18. StartEl: startEl,
  19. }
  20. }
  21. // Token on a Node Decoder returns a xml StartElement. It returns a boolean that indicates the
  22. // a token is the node decoder's end node token; and an error which indicates any error
  23. // that occurred while retrieving the start element
  24. func (d NodeDecoder) Token() (t xml.StartElement, done bool, err error) {
  25. for {
  26. token, e := d.Decoder.Token()
  27. if e != nil {
  28. return t, done, e
  29. }
  30. // check if we reach end of the node being decoded
  31. if el, ok := token.(xml.EndElement); ok {
  32. return t, el == d.StartEl.End(), err
  33. }
  34. if t, ok := token.(xml.StartElement); ok {
  35. return restoreAttrNamespaces(t), false, err
  36. }
  37. // skip token if it is a comment or preamble or empty space value due to indentation
  38. // or if it's a value and is not expected
  39. }
  40. }
  41. // restoreAttrNamespaces update XML attributes to restore the short namespaces found within
  42. // the raw XML document.
  43. func restoreAttrNamespaces(node xml.StartElement) xml.StartElement {
  44. if len(node.Attr) == 0 {
  45. return node
  46. }
  47. // Generate a mapping of XML namespace values to their short names.
  48. ns := map[string]string{}
  49. for _, a := range node.Attr {
  50. if a.Name.Space == "xmlns" {
  51. ns[a.Value] = a.Name.Local
  52. break
  53. }
  54. }
  55. for i, a := range node.Attr {
  56. if a.Name.Space == "xmlns" {
  57. continue
  58. }
  59. // By default, xml.Decoder will fully resolve these namespaces. So if you had <foo xmlns:bar=baz bar:bin=hi/>
  60. // then by default the second attribute would have the `Name.Space` resolved to `baz`. But we need it to
  61. // continue to resolve as `bar` so we can easily identify it later on.
  62. if v, ok := ns[node.Attr[i].Name.Space]; ok {
  63. node.Attr[i].Name.Space = v
  64. }
  65. }
  66. return node
  67. }
  68. // GetElement looks for the given tag name at the current level, and returns the element if found, and
  69. // skipping over non-matching elements. Returns an error if the node is not found, or if an error occurs while walking
  70. // the document.
  71. func (d NodeDecoder) GetElement(name string) (t xml.StartElement, err error) {
  72. for {
  73. token, done, err := d.Token()
  74. if err != nil {
  75. return t, err
  76. }
  77. if done {
  78. return t, fmt.Errorf("%s node not found", name)
  79. }
  80. switch {
  81. case strings.EqualFold(name, token.Name.Local):
  82. return token, nil
  83. default:
  84. err = d.Decoder.Skip()
  85. if err != nil {
  86. return t, err
  87. }
  88. }
  89. }
  90. }
  91. // Value provides an abstraction to retrieve char data value within an xml element.
  92. // The method will return an error if it encounters a nested xml element instead of char data.
  93. // This method should only be used to retrieve simple type or blob shape values as []byte.
  94. func (d NodeDecoder) Value() (c []byte, err error) {
  95. t, e := d.Decoder.Token()
  96. if e != nil {
  97. return c, e
  98. }
  99. endElement := d.StartEl.End()
  100. switch ev := t.(type) {
  101. case xml.CharData:
  102. c = ev.Copy()
  103. case xml.EndElement: // end tag or self-closing
  104. if ev == endElement {
  105. return []byte{}, err
  106. }
  107. return c, fmt.Errorf("expected value for %v element, got %T type %v instead", d.StartEl.Name.Local, t, t)
  108. default:
  109. return c, fmt.Errorf("expected value for %v element, got %T type %v instead", d.StartEl.Name.Local, t, t)
  110. }
  111. t, e = d.Decoder.Token()
  112. if e != nil {
  113. return c, e
  114. }
  115. if ev, ok := t.(xml.EndElement); ok {
  116. if ev == endElement {
  117. return c, err
  118. }
  119. }
  120. return c, fmt.Errorf("expected end element %v, got %T type %v instead", endElement, t, t)
  121. }
  122. // FetchRootElement takes in a decoder and returns the first start element within the xml body.
  123. // This function is useful in fetching the start element of an XML response and ignore the
  124. // comments and preamble
  125. func FetchRootElement(decoder *xml.Decoder) (startElement xml.StartElement, err error) {
  126. for {
  127. t, e := decoder.Token()
  128. if e != nil {
  129. return startElement, e
  130. }
  131. if startElement, ok := t.(xml.StartElement); ok {
  132. return startElement, err
  133. }
  134. }
  135. }