| 1 | /* |
|---|
| 2 | * Gaycko Text mode web browser |
|---|
| 3 | * Copyright (c) 2011 Jean-Yves Lamoureux <jylam@lnxscene.org> |
|---|
| 4 | * All Rights Reserved |
|---|
| 5 | * |
|---|
| 6 | * This library is free software. It comes without any warranty, to |
|---|
| 7 | * the extent permitted by applicable law. You can redistribute it |
|---|
| 8 | * and/or modify it under the terms of the Do What The Fuck You Want |
|---|
| 9 | * To Public License, Version 2, as published by Sam Hocevar. See |
|---|
| 10 | * http://sam.zoy.org/wtfpl/COPYING for more details. |
|---|
| 11 | */ |
|---|
| 12 | #include <config.h> |
|---|
| 13 | #include <stdio.h> |
|---|
| 14 | #include <string.h> |
|---|
| 15 | #include "dom.h" |
|---|
| 16 | #include "helpers/str.h" |
|---|
| 17 | |
|---|
| 18 | gDOM *gaycko_convert_dom(htmlDocPtr doc) { |
|---|
| 19 | |
|---|
| 20 | gDOM *dom = malloc(sizeof(gDOM)); |
|---|
| 21 | dom->root = NULL; |
|---|
| 22 | htmlNodePtr root = xmlDocGetRootElement(doc); |
|---|
| 23 | gNode **r = NULL; |
|---|
| 24 | gNode *ret = NULL; |
|---|
| 25 | |
|---|
| 26 | if(root != NULL) |
|---|
| 27 | { |
|---|
| 28 | ret = explore(r, root, 0, NULL); |
|---|
| 29 | } |
|---|
| 30 | |
|---|
| 31 | dom->root = ret; |
|---|
| 32 | |
|---|
| 33 | return dom; |
|---|
| 34 | } |
|---|
| 35 | |
|---|
| 36 | |
|---|
| 37 | gNode *add_node(htmlNodePtr node) { |
|---|
| 38 | |
|---|
| 39 | gNode *e; |
|---|
| 40 | e = malloc(sizeof(gNode)); |
|---|
| 41 | e->children_count = 0; |
|---|
| 42 | e->children = NULL; |
|---|
| 43 | e->attribute_count = 0; |
|---|
| 44 | e->attributes = NULL; |
|---|
| 45 | e->text = NULL; |
|---|
| 46 | |
|---|
| 47 | e->properties = malloc(sizeof(gProperty)); |
|---|
| 48 | e->properties->x = 0; |
|---|
| 49 | e->properties->y = 0; |
|---|
| 50 | e->properties->width = 0; |
|---|
| 51 | e->properties->height = 0; |
|---|
| 52 | |
|---|
| 53 | if(!node->name) { |
|---|
| 54 | e->name = strdup("UNKNOW"); |
|---|
| 55 | e->type = ELEM_UNKNOW; |
|---|
| 56 | return e; |
|---|
| 57 | } |
|---|
| 58 | |
|---|
| 59 | e->name = (char*)strdup((const char*)node->name); |
|---|
| 60 | if(!strncmp(e->name, "html", 4)) { |
|---|
| 61 | e->type = ELEM_HTML; |
|---|
| 62 | }else if(!strncmp(e->name, "head", 4)) { |
|---|
| 63 | e->type = ELEM_HEAD; |
|---|
| 64 | }else if(!strncmp(e->name, "title", 5)) { |
|---|
| 65 | e->type = ELEM_TITLE; |
|---|
| 66 | }else if(!strncmp(e->name, "meta", 4)) { |
|---|
| 67 | e->type = ELEM_META; |
|---|
| 68 | }else if(!strncmp(e->name, "body", 4)) { |
|---|
| 69 | e->type = ELEM_BODY; |
|---|
| 70 | }else if(!strncmp(e->name, "h1", 2)) { |
|---|
| 71 | e->type = ELEM_H1; |
|---|
| 72 | }else if(!strncmp(e->name, "h2", 2)) { |
|---|
| 73 | e->type = ELEM_H2; |
|---|
| 74 | }else if(!strncmp(e->name, "h3", 2)) { |
|---|
| 75 | e->type = ELEM_H3; |
|---|
| 76 | }else if(!strncmp(e->name, "hr", 2)) { |
|---|
| 77 | e->type = ELEM_HR; |
|---|
| 78 | }else if(!strncmp(e->name, "br", 2) || !strncmp(e->name, "br ", 3)) { |
|---|
| 79 | e->type = ELEM_BR; |
|---|
| 80 | }else if(!strncmp(e->name, "p", 1)) { |
|---|
| 81 | e->type = ELEM_P; |
|---|
| 82 | }else if(!strncmp(e->name, "img", 3)) { |
|---|
| 83 | e->type = ELEM_IMG; |
|---|
| 84 | }else if(!strncmp(e->name, "table", 3)) { |
|---|
| 85 | e->type = ELEM_TABLE; |
|---|
| 86 | }else if(!strncmp(e->name, "tr", 3)) { |
|---|
| 87 | e->type = ELEM_TR; |
|---|
| 88 | }else if(!strncmp(e->name, "td", 3)) { |
|---|
| 89 | e->type = ELEM_TD; |
|---|
| 90 | }else if(!strncmp(e->name, "text", 4)) { |
|---|
| 91 | e->type = ELEM_TEXT; |
|---|
| 92 | e->text = (char*)strdup((const char*)xmlNodeGetContent(node)); |
|---|
| 93 | strip_eol(e->text); |
|---|
| 94 | strip_spaces(&e->text); |
|---|
| 95 | }else { |
|---|
| 96 | e->type = ELEM_UNKNOW; |
|---|
| 97 | printf("Unknow tag '%s'\n", e->name); |
|---|
| 98 | } |
|---|
| 99 | |
|---|
| 100 | if(node->properties) { |
|---|
| 101 | for(xmlAttrPtr attr = node->properties; attr != NULL; attr = attr->next) { |
|---|
| 102 | e->attributes = realloc(e->attributes, sizeof(gAttribute)*(e->attribute_count+1)); |
|---|
| 103 | e->attributes[e->attribute_count].name = strdup((const char*)attr->name); |
|---|
| 104 | e->attributes[e->attribute_count].value = strdup((char*)attr->children->content); |
|---|
| 105 | e->attribute_count++; |
|---|
| 106 | } |
|---|
| 107 | } |
|---|
| 108 | |
|---|
| 109 | return e; |
|---|
| 110 | } |
|---|
| 111 | |
|---|
| 112 | |
|---|
| 113 | gNode* explore(gNode **elem, htmlNodePtr hnode, unsigned int level, gNode *parent) |
|---|
| 114 | { |
|---|
| 115 | unsigned int c = 0; |
|---|
| 116 | for(htmlNodePtr node = hnode; node != NULL; node = node->next) |
|---|
| 117 | { |
|---|
| 118 | gNode *child = add_node(node); |
|---|
| 119 | child->level = level; |
|---|
| 120 | |
|---|
| 121 | if(elem) elem = realloc(elem, sizeof(gNode*)*(c+1)); |
|---|
| 122 | else elem = malloc(sizeof(gNode*)); |
|---|
| 123 | |
|---|
| 124 | |
|---|
| 125 | elem[c] = child; |
|---|
| 126 | |
|---|
| 127 | c++; |
|---|
| 128 | |
|---|
| 129 | if(node->type == XML_ELEMENT_NODE) |
|---|
| 130 | { |
|---|
| 131 | if(node->children != NULL) { |
|---|
| 132 | explore(child->children, |
|---|
| 133 | node->children, |
|---|
| 134 | level+1, |
|---|
| 135 | child); |
|---|
| 136 | } else { |
|---|
| 137 | free(child->children); |
|---|
| 138 | child->children = NULL; |
|---|
| 139 | } |
|---|
| 140 | } |
|---|
| 141 | if(parent) { |
|---|
| 142 | parent->children = realloc(parent->children, sizeof(gNode*)*(parent->children_count+1)); |
|---|
| 143 | parent->children[parent->children_count] = child; |
|---|
| 144 | child->parent = parent; |
|---|
| 145 | parent->children_count++; |
|---|
| 146 | } |
|---|
| 147 | } |
|---|
| 148 | return *elem; |
|---|
| 149 | } |
|---|
| 150 | |
|---|
| 151 | |
|---|
| 152 | void destroy_node(gNode *elem) { |
|---|
| 153 | if(!elem) return; |
|---|
| 154 | |
|---|
| 155 | unsigned int a; |
|---|
| 156 | for(a = 0; a < elem->attribute_count; a++) { |
|---|
| 157 | if(elem->attributes[a].name) free(elem->attributes[a].name); |
|---|
| 158 | if(elem->attributes[a].value) free(elem->attributes[a].value); |
|---|
| 159 | } |
|---|
| 160 | if(elem->attributes) free(elem->attributes); |
|---|
| 161 | if(elem->text) free(elem->text); |
|---|
| 162 | if(elem->name) free(elem->name); |
|---|
| 163 | |
|---|
| 164 | unsigned int i; |
|---|
| 165 | for(i=0; i < elem->children_count; i++) { |
|---|
| 166 | destroy_node(elem->children[i]); |
|---|
| 167 | elem->children[i] = NULL; |
|---|
| 168 | } |
|---|
| 169 | free(elem->children); |
|---|
| 170 | free(elem); |
|---|
| 171 | } |
|---|
| 172 | |
|---|
| 173 | #define LEVEL {unsigned int foo=0; for(foo=0; foo<elem->level; foo++) printf(" ");} |
|---|
| 174 | |
|---|
| 175 | void pretty_print(gNode *elem) { |
|---|
| 176 | if(!elem) { |
|---|
| 177 | printf("elem is %p\n", elem); |
|---|
| 178 | return; |
|---|
| 179 | } |
|---|
| 180 | |
|---|
| 181 | if(elem->type == ELEM_TEXT) { |
|---|
| 182 | LEVEL printf("%s\n", elem->text); |
|---|
| 183 | } else { |
|---|
| 184 | LEVEL printf("<%s", elem->name); |
|---|
| 185 | unsigned int a; |
|---|
| 186 | for(a = 0; a < elem->attribute_count; a++) { |
|---|
| 187 | printf(" %s=\"%s\"", elem->attributes[a].name, elem->attributes[a].value); |
|---|
| 188 | } |
|---|
| 189 | printf(">\n"); |
|---|
| 190 | } |
|---|
| 191 | |
|---|
| 192 | unsigned int i; |
|---|
| 193 | for(i=0; i < elem->children_count; i++) { |
|---|
| 194 | pretty_print(elem->children[i]); |
|---|
| 195 | } |
|---|
| 196 | |
|---|
| 197 | if(elem->type == ELEM_TEXT) { |
|---|
| 198 | |
|---|
| 199 | } else { |
|---|
| 200 | LEVEL printf("</%s>\n", elem->name); |
|---|
| 201 | } |
|---|
| 202 | } |
|---|
| 203 | |
|---|