1 | /* |
---|
2 | * Gaycko Text mode web browser |
---|
3 | * Copyright (c) 2011 Jean-Yves Lamoureux <jylam@lnxscene.org> |
---|
4 | * All Rights Reserved |
---|
5 | * |
---|
6 | * This library is free software. It comes without any warranty, to |
---|
7 | * the extent permitted by applicable law. You can redistribute it |
---|
8 | * and/or modify it under the terms of the Do What The Fuck You Want |
---|
9 | * To Public License, Version 2, as published by Sam Hocevar. See |
---|
10 | * http://sam.zoy.org/wtfpl/COPYING for more details. |
---|
11 | */ |
---|
12 | #include <config.h> |
---|
13 | #include <stdio.h> |
---|
14 | #include <string.h> |
---|
15 | #include "dom.h" |
---|
16 | #include "helpers/str.h" |
---|
17 | |
---|
18 | gDOM *gaycko_convert_dom(htmlDocPtr doc) { |
---|
19 | |
---|
20 | gDOM *dom = malloc(sizeof(gDOM)); |
---|
21 | dom->root = NULL; |
---|
22 | htmlNodePtr root = xmlDocGetRootElement(doc); |
---|
23 | gNode **r = NULL; |
---|
24 | gNode *ret = NULL; |
---|
25 | |
---|
26 | if(root != NULL) |
---|
27 | { |
---|
28 | ret = explore(r, root, 0, NULL); |
---|
29 | } |
---|
30 | |
---|
31 | dom->root = ret; |
---|
32 | |
---|
33 | return dom; |
---|
34 | } |
---|
35 | |
---|
36 | |
---|
37 | gNode *add_node(htmlNodePtr node) { |
---|
38 | |
---|
39 | gNode *e; |
---|
40 | e = malloc(sizeof(gNode)); |
---|
41 | e->children_count = 0; |
---|
42 | e->children = NULL; |
---|
43 | e->attribute_count = 0; |
---|
44 | e->attributes = NULL; |
---|
45 | e->text = NULL; |
---|
46 | |
---|
47 | e->properties = malloc(sizeof(gProperty)); |
---|
48 | e->properties->x = 0; |
---|
49 | e->properties->y = 0; |
---|
50 | e->properties->width = 0; |
---|
51 | e->properties->height = 0; |
---|
52 | |
---|
53 | if(!node->name) { |
---|
54 | e->name = strdup("UNKNOW"); |
---|
55 | e->type = ELEM_UNKNOW; |
---|
56 | return e; |
---|
57 | } |
---|
58 | |
---|
59 | e->name = (char*)strdup((const char*)node->name); |
---|
60 | if(!strncmp(e->name, "html", 4)) { |
---|
61 | e->type = ELEM_HTML; |
---|
62 | }else if(!strncmp(e->name, "head", 4)) { |
---|
63 | e->type = ELEM_HEAD; |
---|
64 | }else if(!strncmp(e->name, "title", 5)) { |
---|
65 | e->type = ELEM_TITLE; |
---|
66 | }else if(!strncmp(e->name, "meta", 4)) { |
---|
67 | e->type = ELEM_META; |
---|
68 | }else if(!strncmp(e->name, "body", 4)) { |
---|
69 | e->type = ELEM_BODY; |
---|
70 | }else if(!strncmp(e->name, "h1", 2)) { |
---|
71 | e->type = ELEM_H1; |
---|
72 | }else if(!strncmp(e->name, "h2", 2)) { |
---|
73 | e->type = ELEM_H2; |
---|
74 | }else if(!strncmp(e->name, "h3", 2)) { |
---|
75 | e->type = ELEM_H3; |
---|
76 | }else if(!strncmp(e->name, "hr", 2)) { |
---|
77 | e->type = ELEM_HR; |
---|
78 | }else if(!strncmp(e->name, "br", 2) || !strncmp(e->name, "br ", 3)) { |
---|
79 | e->type = ELEM_BR; |
---|
80 | }else if(!strncmp(e->name, "p", 1)) { |
---|
81 | e->type = ELEM_P; |
---|
82 | }else if(!strncmp(e->name, "img", 3)) { |
---|
83 | e->type = ELEM_IMG; |
---|
84 | }else if(!strncmp(e->name, "table", 3)) { |
---|
85 | e->type = ELEM_TABLE; |
---|
86 | e->specific.table = malloc(sizeof(gTable)); |
---|
87 | e->specific.table->border = 0; |
---|
88 | e->specific.table->tr_count = 0; |
---|
89 | e->specific.table->tr = NULL; |
---|
90 | }else if(!strncmp(e->name, "tr", 3)) { |
---|
91 | e->type = ELEM_TR; |
---|
92 | e->specific.tr = malloc(sizeof(gTr)); |
---|
93 | e->specific.tr->border = 0; |
---|
94 | e->specific.tr->td_count = 0; |
---|
95 | e->specific.tr->td = NULL; |
---|
96 | }else if(!strncmp(e->name, "td", 3)) { |
---|
97 | e->type = ELEM_TD; |
---|
98 | }else if(!strncmp(e->name, "text", 4)) { |
---|
99 | e->type = ELEM_TEXT; |
---|
100 | e->text = (char*)strdup((const char*)xmlNodeGetContent(node)); |
---|
101 | strip_eol(e->text); |
---|
102 | strip_spaces(&e->text); |
---|
103 | }else { |
---|
104 | e->type = ELEM_UNKNOW; |
---|
105 | printf("Unknow tag '%s'\n", e->name); |
---|
106 | } |
---|
107 | |
---|
108 | if(node->properties) { |
---|
109 | for(xmlAttrPtr attr = node->properties; attr != NULL; attr = attr->next) { |
---|
110 | e->attributes = realloc(e->attributes, sizeof(gAttribute)*(e->attribute_count+1)); |
---|
111 | e->attributes[e->attribute_count].name = strdup((const char*)attr->name); |
---|
112 | e->attributes[e->attribute_count].value = strdup((char*)attr->children->content); |
---|
113 | e->attribute_count++; |
---|
114 | } |
---|
115 | } |
---|
116 | |
---|
117 | return e; |
---|
118 | } |
---|
119 | |
---|
120 | |
---|
121 | gNode* explore(gNode **elem, htmlNodePtr hnode, unsigned int level, gNode *parent) |
---|
122 | { |
---|
123 | unsigned int c = 0; |
---|
124 | for(htmlNodePtr node = hnode; node != NULL; node = node->next) |
---|
125 | { |
---|
126 | gNode *child = add_node(node); |
---|
127 | child->level = level; |
---|
128 | |
---|
129 | if(elem) elem = realloc(elem, sizeof(gNode*)*(c+1)); |
---|
130 | else elem = malloc(sizeof(gNode*)); |
---|
131 | |
---|
132 | |
---|
133 | elem[c] = child; |
---|
134 | |
---|
135 | c++; |
---|
136 | |
---|
137 | if(node->type == XML_ELEMENT_NODE) |
---|
138 | { |
---|
139 | if(node->children != NULL) { |
---|
140 | explore(child->children, |
---|
141 | node->children, |
---|
142 | level+1, |
---|
143 | child); |
---|
144 | } else { |
---|
145 | free(child->children); |
---|
146 | child->children = NULL; |
---|
147 | } |
---|
148 | } |
---|
149 | if(parent) { |
---|
150 | parent->children = realloc(parent->children, sizeof(gNode*)*(parent->children_count+1)); |
---|
151 | parent->children[parent->children_count] = child; |
---|
152 | child->parent = parent; |
---|
153 | parent->children_count++; |
---|
154 | } |
---|
155 | } |
---|
156 | return *elem; |
---|
157 | } |
---|
158 | |
---|
159 | |
---|
160 | void destroy_node(gNode *elem) { |
---|
161 | if(!elem) return; |
---|
162 | |
---|
163 | unsigned int a; |
---|
164 | for(a = 0; a < elem->attribute_count; a++) { |
---|
165 | if(elem->attributes[a].name) free(elem->attributes[a].name); |
---|
166 | if(elem->attributes[a].value) free(elem->attributes[a].value); |
---|
167 | } |
---|
168 | if(elem->attributes) free(elem->attributes); |
---|
169 | if(elem->text) free(elem->text); |
---|
170 | if(elem->name) free(elem->name); |
---|
171 | |
---|
172 | unsigned int i; |
---|
173 | for(i=0; i < elem->children_count; i++) { |
---|
174 | destroy_node(elem->children[i]); |
---|
175 | elem->children[i] = NULL; |
---|
176 | } |
---|
177 | free(elem->children); |
---|
178 | free(elem); |
---|
179 | } |
---|
180 | |
---|
181 | #define LEVEL {unsigned int foo=0; for(foo=0; foo<elem->level; foo++) printf(" ");} |
---|
182 | |
---|
183 | void pretty_print(gNode *elem) { |
---|
184 | if(!elem) { |
---|
185 | printf("elem is %p\n", elem); |
---|
186 | return; |
---|
187 | } |
---|
188 | |
---|
189 | if(elem->type == ELEM_TEXT) { |
---|
190 | LEVEL printf("%s\n", elem->text); |
---|
191 | } else { |
---|
192 | LEVEL printf("<%s", elem->name); |
---|
193 | unsigned int a; |
---|
194 | for(a = 0; a < elem->attribute_count; a++) { |
---|
195 | printf(" %s=\"%s\"", elem->attributes[a].name, elem->attributes[a].value); |
---|
196 | } |
---|
197 | printf(">\n"); |
---|
198 | } |
---|
199 | |
---|
200 | unsigned int i; |
---|
201 | for(i=0; i < elem->children_count; i++) { |
---|
202 | pretty_print(elem->children[i]); |
---|
203 | } |
---|
204 | |
---|
205 | if(elem->type == ELEM_TEXT) { |
---|
206 | |
---|
207 | } else { |
---|
208 | LEVEL printf("</%s>\n", elem->name); |
---|
209 | } |
---|
210 | } |
---|
211 | |
---|