package stream import ( "bytes" "encoding/xml" "fmt" "io" "unicode" "golang.org/x/net/html/charset" ) type frame struct { HasNested bool TotalNested int AttrCount int } func run(xmlDecoder *xml.Decoder, reader io.Reader, writer io.Writer) { writeBuffer := bytes.NewBufferString("") var tokenToProcess interface{} stack := []frame{} stackSize := 0 var prevToken interface{} for { tokenI := tokenToProcess tokenToProcess = nil if tokenI == nil { tokenI, _ = xmlDecoder.Token() } if tokenI == nil { break } switch token := tokenI.(type) { case xml.StartElement: switch prevToken.(type) { case xml.EndElement: writeBuffer.WriteString(", ") } parent := frame{} frame := frame{} if stackSize > 0 { parent = stack[stackSize-1] } tokenToProcess,_ = xmlDecoder.Token() switch tokenToProcess.(type) { case xml.StartElement: if prevToken != nil { writeBuffer.WriteString(fmt.Sprintf("\"%v\":", parent.TotalNested)) } writeBuffer.WriteString("{") frame.HasNested = true } writeBuffer.WriteString("\""+token.Name.Local+"\":") frame.AttrCount = len(token.Attr) if frame.AttrCount > 0 { writeBuffer.WriteString("{") } for _, attr := range token.Attr { writeBuffer.WriteString("\"@"+attr.Name.Local+"\": \""+attr.Value+"\",") } if frame.AttrCount > 0 { writeBuffer.WriteString("\"value\":") } if frame.HasNested { writeBuffer.WriteString("{") } if stackSize > 0 { parent.TotalNested++ stack[stackSize-1] = parent } stack = append(stack[:stackSize], frame) stackSize++ case xml.CharData: switch prevToken.(type) { case xml.CharData: writeBuffer.WriteString(", ") } writeBuffer.WriteString("\""+trimNonGraphic(string(xml.CharData(token)))+"\"") case xml.EndElement: var frame frame if stackSize > 0 { stackSize-- frame = stack[stackSize] } else { // TODO: process this error (unbalanced StartElements and EndElements) } if frame.HasNested { writeBuffer.WriteString(fmt.Sprintf(",\"#count\":\"%v\"}", frame.TotalNested)) } if frame.AttrCount > 0 { writeBuffer.WriteString("}") } if frame.HasNested { writeBuffer.WriteString("}") } } if writeBuffer.Len() > 65535 { writer.Write(writeBuffer.Bytes()) writeBuffer.Reset() } prevToken = tokenI } if writeBuffer.Len() > 0 { writer.Write(writeBuffer.Bytes()) writeBuffer.Reset() } } func Run(reader io.Reader, writer io.Writer) { xmlDecoder := xml.NewDecoder(reader) xmlDecoder.CharsetReader = charset.NewReaderLabel run(xmlDecoder, reader, writer) return } // about trimNonGraphic(): // Copied from: https://github.com/basgys/goxml2json/blob/5452b6625ea2d3b3133c6b3ace15084d97dcc810/decoder.go // // Copyright (c) 2016 Bastien Gysler // // trimNonGraphic returns a slice of the string s, with all leading and trailing // non graphic characters and spaces removed. // // Graphic characters include letters, marks, numbers, punctuation, symbols, // and spaces, from categories L, M, N, P, S, Zs. // Spacing characters are set by category Z and property Pattern_White_Space. func trimNonGraphic(s string) string { if s == "" { return s } var first *int var last int for i, r := range []rune(s) { if !unicode.IsGraphic(r) || unicode.IsSpace(r) { continue } if first == nil { f := i // copy i first = &f last = i } else { last = i } } // If first is nil, it means there are no graphic characters if first == nil { return "" } return string([]rune(s)[*first : last+1]) }