stream.go 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. package stream
  2. import (
  3. "bytes"
  4. "encoding/xml"
  5. "fmt"
  6. "io"
  7. "unicode"
  8. "golang.org/x/net/html/charset"
  9. )
  10. type frame struct {
  11. HasNested bool
  12. TotalNested int
  13. AttrCount int
  14. }
  15. func run(xmlDecoder *xml.Decoder, reader io.Reader, writer io.Writer) {
  16. writeBuffer := bytes.NewBufferString("")
  17. var tokenToProcess interface{}
  18. stack := []frame{}
  19. stackSize := 0
  20. var prevToken interface{}
  21. for {
  22. tokenI := tokenToProcess
  23. tokenToProcess = nil
  24. if tokenI == nil {
  25. tokenI, _ = xmlDecoder.Token()
  26. }
  27. if tokenI == nil {
  28. break
  29. }
  30. switch token := tokenI.(type) {
  31. case xml.StartElement:
  32. switch prevToken.(type) {
  33. case xml.EndElement:
  34. writeBuffer.WriteString(", ")
  35. }
  36. parent := frame{}
  37. frame := frame{}
  38. if stackSize > 0 {
  39. parent = stack[stackSize-1]
  40. }
  41. tokenToProcess,_ = xmlDecoder.Token()
  42. switch tokenToProcess.(type) {
  43. case xml.StartElement:
  44. if prevToken != nil {
  45. writeBuffer.WriteString(fmt.Sprintf("\"%v\":", parent.TotalNested))
  46. }
  47. writeBuffer.WriteString("{")
  48. frame.HasNested = true
  49. }
  50. writeBuffer.WriteString("\""+token.Name.Local+"\":")
  51. frame.AttrCount = len(token.Attr)
  52. if frame.AttrCount > 0 {
  53. writeBuffer.WriteString("{")
  54. }
  55. for _, attr := range token.Attr {
  56. writeBuffer.WriteString("\"@"+attr.Name.Local+"\": \""+attr.Value+"\",")
  57. }
  58. if frame.AttrCount > 0 {
  59. writeBuffer.WriteString("\"value\":")
  60. }
  61. if frame.HasNested {
  62. writeBuffer.WriteString("{")
  63. }
  64. if stackSize > 0 {
  65. parent.TotalNested++
  66. stack[stackSize-1] = parent
  67. }
  68. stack = append(stack[:stackSize], frame)
  69. stackSize++
  70. case xml.CharData:
  71. switch prevToken.(type) {
  72. case xml.CharData:
  73. writeBuffer.WriteString(", ")
  74. }
  75. writeBuffer.WriteString("\""+trimNonGraphic(string(xml.CharData(token)))+"\"")
  76. case xml.EndElement:
  77. var frame frame
  78. if stackSize > 0 {
  79. stackSize--
  80. frame = stack[stackSize]
  81. } else {
  82. // TODO: process this error (unbalanced StartElements and EndElements)
  83. }
  84. if frame.HasNested {
  85. writeBuffer.WriteString(fmt.Sprintf(",\"#count\":\"%v\"}", frame.TotalNested))
  86. }
  87. if frame.AttrCount > 0 {
  88. writeBuffer.WriteString("}")
  89. }
  90. if frame.HasNested {
  91. writeBuffer.WriteString("}")
  92. }
  93. }
  94. if writeBuffer.Len() > 65535 {
  95. writer.Write(writeBuffer.Bytes())
  96. writeBuffer.Reset()
  97. }
  98. prevToken = tokenI
  99. }
  100. if writeBuffer.Len() > 0 {
  101. writer.Write(writeBuffer.Bytes())
  102. writeBuffer.Reset()
  103. }
  104. }
  105. func Run(reader io.Reader, writer io.Writer) {
  106. xmlDecoder := xml.NewDecoder(reader)
  107. xmlDecoder.CharsetReader = charset.NewReaderLabel
  108. run(xmlDecoder, reader, writer)
  109. return
  110. }
  111. // about trimNonGraphic():
  112. // Copied from: https://github.com/basgys/goxml2json/blob/5452b6625ea2d3b3133c6b3ace15084d97dcc810/decoder.go
  113. //
  114. // Copyright (c) 2016 Bastien Gysler
  115. //
  116. // trimNonGraphic returns a slice of the string s, with all leading and trailing
  117. // non graphic characters and spaces removed.
  118. //
  119. // Graphic characters include letters, marks, numbers, punctuation, symbols,
  120. // and spaces, from categories L, M, N, P, S, Zs.
  121. // Spacing characters are set by category Z and property Pattern_White_Space.
  122. func trimNonGraphic(s string) string {
  123. if s == "" {
  124. return s
  125. }
  126. var first *int
  127. var last int
  128. for i, r := range []rune(s) {
  129. if !unicode.IsGraphic(r) || unicode.IsSpace(r) {
  130. continue
  131. }
  132. if first == nil {
  133. f := i // copy i
  134. first = &f
  135. last = i
  136. } else {
  137. last = i
  138. }
  139. }
  140. // If first is nil, it means there are no graphic characters
  141. if first == nil {
  142. return ""
  143. }
  144. return string([]rune(s)[*first : last+1])
  145. }