source: gaycko/trunk/src/parse.c @ 4860

Last change on this file since 4860 was 4860, checked in by Jean-Yves Lamoureux, 8 years ago
  • reordering
File size: 1.5 KB
Line 
1/*
2 *  Gaycko        Text mode web browser
3 *  Copyright (c) 2011 Jean-Yves Lamoureux <jylam@lnxscene.org>
4 *                All Rights Reserved
5 *
6 *  This library is free software. It comes without any warranty, to
7 *  the extent permitted by applicable law. You can redistribute it
8 *  and/or modify it under the terms of the Do What The Fuck You Want
9 *  To Public License, Version 2, as published by Sam Hocevar. See
10 *  http://sam.zoy.org/wtfpl/COPYING for more details.
11 */
12#include "parse.h"
13
14gDOM *gaycko_parse(char *data, unsigned int size) {
15
16        /* Clean up and repair bad HTML */
17        TidyDoc tdoc = tidyCreate();
18        TidyBuffer errbuf = {0};
19
20        TidyBuffer output = {0};
21
22        Bool ok;
23        int rc = -1;
24
25        ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes );  // Convert to XHTML
26        if ( ok )
27                rc = tidySetErrorBuffer( tdoc, &errbuf );      // Capture diagnostics
28        if ( rc >= 0 )
29                rc = tidyParseString( tdoc, data );           // Parse the input
30        if ( rc >= 0 )
31                rc = tidyCleanAndRepair( tdoc );               // Tidy it up!
32        if ( rc > 1 )                                    // If error, force output.
33                rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 );
34        if ( rc >= 0 )
35                rc = tidySaveBuffer( tdoc, &output );          // Pretty Print
36
37        /* Actual parsing */
38        htmlDocPtr doc = htmlParseDoc((unsigned char*)output.bp, NULL);
39       
40        /* Release tidy document */
41        tidyRelease( tdoc );
42
43        /* Convert libxml2's tree to our own DOM */
44        gDOM *dom = gaycko_convert_dom(doc);
45
46        /* Free libxml2 tree */
47        xmlFreeDoc(doc);
48
49        return dom;
50}
51
Note: See TracBrowser for help on using the repository browser.