1 /***************************************************************************
3 * Project ___| | | | _ \| |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
8 * Copyright (C) 1998 - 2017, Daniel Stenberg, <daniel@haxx.se>, et al.
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.haxx.se/docs/copyright.html.
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
21 ***************************************************************************/
23 * Get a web page, extract the title with libxml.
26 Written by Lars Nilsson
28 GNU C++ compile command line suggestion (edit paths accordingly):
30 g++ -Wall -I/opt/curl/include -I/opt/libxml/include/libxml2 htmltitle.cpp \
31 -o htmltitle -L/opt/curl/lib -L/opt/libxml/lib -lcurl -lxml2
37 #include <curl/curl.h>
38 #include <libxml/HTMLparser.h>
41 // Case-insensitive string comparison
45 #define COMPARE(a, b) (!_stricmp((a), (b)))
47 #define COMPARE(a, b) (!strcasecmp((a), (b)))
51 // libxml callback context structure
56 Context(): addTitle(false) { }
63 // libcurl variables for error strings and returned data
65 static char errorBuffer[CURL_ERROR_SIZE];
66 static std::string buffer;
69 // libcurl write callback function
72 static int writer(char *data, size_t size, size_t nmemb,
73 std::string *writerData)
75 if(writerData == NULL)
78 writerData->append(data, size*nmemb);
84 // libcurl connection initialization
87 static bool init(CURL *&conn, char *url)
91 conn = curl_easy_init();
94 fprintf(stderr, "Failed to create CURL connection\n");
98 code = curl_easy_setopt(conn, CURLOPT_ERRORBUFFER, errorBuffer);
99 if(code != CURLE_OK) {
100 fprintf(stderr, "Failed to set error buffer [%d]\n", code);
104 code = curl_easy_setopt(conn, CURLOPT_URL, url);
105 if(code != CURLE_OK) {
106 fprintf(stderr, "Failed to set URL [%s]\n", errorBuffer);
110 code = curl_easy_setopt(conn, CURLOPT_FOLLOWLOCATION, 1L);
111 if(code != CURLE_OK) {
112 fprintf(stderr, "Failed to set redirect option [%s]\n", errorBuffer);
116 code = curl_easy_setopt(conn, CURLOPT_WRITEFUNCTION, writer);
117 if(code != CURLE_OK) {
118 fprintf(stderr, "Failed to set writer [%s]\n", errorBuffer);
122 code = curl_easy_setopt(conn, CURLOPT_WRITEDATA, &buffer);
123 if(code != CURLE_OK) {
124 fprintf(stderr, "Failed to set write data [%s]\n", errorBuffer);
132 // libxml start element callback function
135 static void StartElement(void *voidContext,
137 const xmlChar **attributes)
139 Context *context = (Context *)voidContext;
141 if(COMPARE((char *)name, "TITLE")) {
143 context->addTitle = true;
149 // libxml end element callback function
152 static void EndElement(void *voidContext,
155 Context *context = (Context *)voidContext;
157 if(COMPARE((char *)name, "TITLE"))
158 context->addTitle = false;
162 // Text handling helper function
165 static void handleCharacters(Context *context,
166 const xmlChar *chars,
169 if(context->addTitle)
170 context->title.append((char *)chars, length);
174 // libxml PCDATA callback function
177 static void Characters(void *voidContext,
178 const xmlChar *chars,
181 Context *context = (Context *)voidContext;
183 handleCharacters(context, chars, length);
187 // libxml CDATA callback function
190 static void cdata(void *voidContext,
191 const xmlChar *chars,
194 Context *context = (Context *)voidContext;
196 handleCharacters(context, chars, length);
200 // libxml SAX callback structure
203 static htmlSAXHandler saxHandler =
235 // Parse given (assumed to be) HTML text and return the title
238 static void parseHtml(const std::string &html,
241 htmlParserCtxtPtr ctxt;
244 ctxt = htmlCreatePushParserCtxt(&saxHandler, &context, "", 0, "",
245 XML_CHAR_ENCODING_NONE);
247 htmlParseChunk(ctxt, html.c_str(), html.size(), 0);
248 htmlParseChunk(ctxt, "", 0, 1);
250 htmlFreeParserCtxt(ctxt);
252 title = context.title;
255 int main(int argc, char *argv[])
261 // Ensure one argument is given
264 fprintf(stderr, "Usage: %s <url>\n", argv[0]);
268 curl_global_init(CURL_GLOBAL_DEFAULT);
270 // Initialize CURL connection
272 if(!init(conn, argv[1])) {
273 fprintf(stderr, "Connection initializion failed\n");
277 // Retrieve content for the URL
279 code = curl_easy_perform(conn);
280 curl_easy_cleanup(conn);
282 if(code != CURLE_OK) {
283 fprintf(stderr, "Failed to get '%s' [%s]\n", argv[1], errorBuffer);
287 // Parse the (assumed) HTML code
288 parseHtml(buffer, title);
290 // Display the extracted title
291 printf("Title: %s\n", title.c_str());