source: gaycko/trunk/src/parsing/parse.c @ 4748

Last change on this file since 4748 was 4748, checked in by Jean-Yves Lamoureux, 10 years ago
  • Added copyright notices
File size: 1.5 KB
Line 
1/*
2 *  Gaycko        Text mode web browser
3 *  Copyright (c) 2011 Jean-Yves Lamoureux <jylam@lnxscene.org>
4 *                All Rights Reserved
5 *
6 *  This library is free software. It comes without any warranty, to
7 *  the extent permitted by applicable law. You can redistribute it
8 *  and/or modify it under the terms of the Do What The Fuck You Want
9 *  To Public License, Version 2, as published by Sam Hocevar. See
10 *  http://sam.zoy.org/wtfpl/COPYING for more details.
11 */
12#include "parse.h"
13
14
15gDOM *gaycko_parse(char *data, unsigned int size) {
16
17        /* Clean up and repair bad HTML */
18        TidyDoc tdoc = tidyCreate();
19        TidyBuffer errbuf = {0};
20
21        TidyBuffer output = {0};
22
23        Bool ok;
24        int rc = -1;
25
26        ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes );  // Convert to XHTML
27        if ( ok )
28                rc = tidySetErrorBuffer( tdoc, &errbuf );      // Capture diagnostics
29        if ( rc >= 0 )
30                rc = tidyParseString( tdoc, data );           // Parse the input
31        if ( rc >= 0 )
32                rc = tidyCleanAndRepair( tdoc );               // Tidy it up!
33        if ( rc > 1 )                                    // If error, force output.
34                rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 );
35        if ( rc >= 0 )
36                rc = tidySaveBuffer( tdoc, &output );          // Pretty Print
37
38        /* Actual parsing */
39        htmlDocPtr doc = htmlParseDoc((unsigned char*)output.bp, NULL);
40       
41        /* Release tidy document */
42        tidyRelease( tdoc );
43
44        /* Convert libxml2's tree to our own DOM */
45        gDOM *dom = gaycko_convert_dom(doc);
46
47        /* Free libxml2 tree */
48        xmlFreeDoc(doc);
49
50        return dom;
51}
52
Note: See TracBrowser for help on using the repository browser.