git-subtree-dir: users/wpcarro git-subtree-mainline:464bbcb15cgit-subtree-split:24f5a642afChange-Id: I6105b3762b79126b3488359c95978cadb3efa789
		
			
				
	
	
		
			98 lines
		
	
	
	
		
			2.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			98 lines
		
	
	
	
		
			2.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import string
 | |
| from parser import Parser
 | |
| 
 | |
| mapping = {
 | |
|     1: "family",
 | |
|     2: "person",
 | |
|     3: "firstName",
 | |
|     4: "lastName",
 | |
|     5: "state",
 | |
| }
 | |
| 
 | |
| def parse_int(i, xs):
 | |
|     result = ""
 | |
|     while i < len(xs) and xs[i] in string.digits:
 | |
|         result += xs[i]
 | |
|         i += 1
 | |
|     return i, int(result)
 | |
| 
 | |
| def parse_string(i, xs):
 | |
|     result = ""
 | |
|     while xs[i+1] not in string.digits:
 | |
|         result += xs[i]
 | |
|         i += 1
 | |
|     return i, result
 | |
| 
 | |
| def tokenize(xs):
 | |
|     result = []
 | |
|     i = 0
 | |
|     while i < len(xs):
 | |
|         if xs[i] in string.digits:
 | |
|             i, n = parse_int(i, xs)
 | |
|             result.append(n)
 | |
|         elif xs[i] in string.ascii_letters:
 | |
|             i, x = parse_string(i, xs)
 | |
|             result.append(x)
 | |
|         elif xs[i] == " ":
 | |
|             i += 1
 | |
|             continue
 | |
|     return result
 | |
| 
 | |
| def parse(xs):
 | |
|     parser = Parser(tokenize(xs))
 | |
|     return parse_element(parser)
 | |
| 
 | |
| # Element   -> Tag Attribute* End Element* End ;
 | |
| # Tag       -> INTEGER ;
 | |
| # Value     -> STRING End ;
 | |
| # Attribute -> Tag Value ;
 | |
| # End       -> 0 ;
 | |
| 
 | |
| def parse_element(parser):
 | |
|     if type(parser.curr()) == str:
 | |
|         return parser.consume()
 | |
|     tag_id = parser.expect_predicate(lambda x: type(x) == int)
 | |
|     tag = mapping[tag_id]
 | |
|     attrs = parse_attrs(parser)
 | |
|     parser.expect([0])
 | |
|     children = []
 | |
|     while not parser.exhausted() and parser.curr() != 0:
 | |
|         children.append(parse_element(parser))
 | |
|     parser.expect([0])
 | |
|     return [tag, attrs, children]
 | |
| 
 | |
| def parse_attrs(parser):
 | |
|     result = []
 | |
|     while parser.curr() != 0:
 | |
|         tag_id = parser.expect_predicate(lambda x: type(x) == int)
 | |
|         tag = mapping[tag_id]
 | |
|         value = parser.consume()
 | |
|         result.append((tag, value))
 | |
|     return result
 | |
| 
 | |
| def stringify_xml(tree, indent=0):
 | |
|     if type(tree) == str:
 | |
|         return tree
 | |
|     result = ""
 | |
|     tag, attrs, children = tree
 | |
| 
 | |
|     str_attrs = []
 | |
|     for k, v in attrs:
 | |
|         str_attrs.append("{}=\"{}\"".format(k, v))
 | |
|     str_attrs = (" " if str_attrs else "") + " ".join(str_attrs)
 | |
| 
 | |
|     str_children = []
 | |
|     for child in children:
 | |
|         str_children.append(" " * 2 * indent + stringify_xml(child, indent + 1))
 | |
|     str_children = "\n".join(str_children)
 | |
| 
 | |
|     result += "{}<{}{}>\n{}{}\n{}</{}>".format(
 | |
|         " " * 2 * indent, tag, str_attrs, " " * 2 * indent, str_children,
 | |
|         " " * 2 * indent, tag)
 | |
|     return result
 | |
| 
 | |
| x = "1 4 McDowell 5 CA 0 2 3 Gayle 0 Some Message 0 0"
 | |
| print("Input:   {}".format(x))
 | |
| print("Tokens:  {}".format(tokenize(x)))
 | |
| print("Parsed:  {}".format(parse(x)))
 | |
| print("{}".format(stringify_xml(parse(x))))
 |