1 | /* |
---|
2 | * Gaycko Text mode web browser |
---|
3 | * Copyright (c) 2011 Jean-Yves Lamoureux <jylam@lnxscene.org> |
---|
4 | * All Rights Reserved |
---|
5 | * |
---|
6 | * This library is free software. It comes without any warranty, to |
---|
7 | * the extent permitted by applicable law. You can redistribute it |
---|
8 | * and/or modify it under the terms of the Do What The Fuck You Want |
---|
9 | * To Public License, Version 2, as published by Sam Hocevar. See |
---|
10 | * http://sam.zoy.org/wtfpl/COPYING for more details. |
---|
11 | */ |
---|
12 | #include "parse.h" |
---|
13 | |
---|
14 | gDOM *gaycko_parse(char *data, unsigned int size) { |
---|
15 | |
---|
16 | /* Clean up and repair bad HTML */ |
---|
17 | TidyDoc tdoc = tidyCreate(); |
---|
18 | TidyBuffer errbuf = {0}; |
---|
19 | |
---|
20 | TidyBuffer output = {0}; |
---|
21 | |
---|
22 | Bool ok; |
---|
23 | int rc = -1; |
---|
24 | |
---|
25 | ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes ); // Convert to XHTML |
---|
26 | if ( ok ) |
---|
27 | rc = tidySetErrorBuffer( tdoc, &errbuf ); // Capture diagnostics |
---|
28 | if ( rc >= 0 ) |
---|
29 | rc = tidyParseString( tdoc, data ); // Parse the input |
---|
30 | if ( rc >= 0 ) |
---|
31 | rc = tidyCleanAndRepair( tdoc ); // Tidy it up! |
---|
32 | if ( rc > 1 ) // If error, force output. |
---|
33 | rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 ); |
---|
34 | if ( rc >= 0 ) |
---|
35 | rc = tidySaveBuffer( tdoc, &output ); // Pretty Print |
---|
36 | |
---|
37 | /* Actual parsing */ |
---|
38 | htmlDocPtr doc = htmlParseDoc((unsigned char*)output.bp, NULL); |
---|
39 | |
---|
40 | /* Release tidy document */ |
---|
41 | tidyRelease( tdoc ); |
---|
42 | |
---|
43 | /* Convert libxml2's tree to our own DOM */ |
---|
44 | gDOM *dom = gaycko_convert_dom(doc); |
---|
45 | |
---|
46 | /* Free libxml2 tree */ |
---|
47 | xmlFreeDoc(doc); |
---|
48 | |
---|
49 | return dom; |
---|
50 | } |
---|
51 | |
---|