| 1 | /* |
| 2 | * Copyright (C) 2014 - Jonathan Rajotte <jonathan.r.julien@gmail.com> |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or modify it |
| 5 | * under the terms of the GNU General Public License, version 2 only, as |
| 6 | * published by the Free Software Foundation. |
| 7 | * |
| 8 | * This program is distributed in the hope that it will be useful, but WITHOUT |
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
| 11 | * more details. |
| 12 | * |
| 13 | * You should have received a copy of the GNU General Public License along with |
| 14 | * this program; if not, write to the Free Software Foundation, Inc., 51 |
| 15 | */ |
| 16 | |
| 17 | /* |
| 18 | * Usage: extract_xml [-v] xml_path xpath_expression |
| 19 | * Evaluate XPath expression and prints result node set. |
| 20 | * args[1] path to the xml file |
| 21 | * args[2] xpath expression to extract |
| 22 | * If -v is set the name of the node will appear with his value delimited by |
| 23 | * a semicolon(;) |
| 24 | * Ex: |
| 25 | * Command:extract_xml ../file.xml /test/node/text() |
| 26 | * Output: |
| 27 | * a |
| 28 | * b |
| 29 | * c |
| 30 | * With -v |
| 31 | * node;a; |
| 32 | * node;b; |
| 33 | * node;c; |
| 34 | */ |
| 35 | #include <stdlib.h> |
| 36 | #include <stdio.h> |
| 37 | #include <string.h> |
| 38 | #include <assert.h> |
| 39 | #include <unistd.h> |
| 40 | |
| 41 | #include <libxml/tree.h> |
| 42 | #include <libxml/parser.h> |
| 43 | #include <libxml/xpath.h> |
| 44 | #include <libxml/xpathInternals.h> |
| 45 | |
| 46 | #if defined(LIBXML_XPATH_ENABLED) |
| 47 | |
| 48 | |
| 49 | int opt_verbose; |
| 50 | /** |
| 51 | * print_xpath_nodes: |
| 52 | * nodes: the nodes set. |
| 53 | * output: the output file handle. |
| 54 | * |
| 55 | * Print the node content to the file |
| 56 | */ |
| 57 | static int print_xpath_nodes(xmlDocPtr doc, xmlNodeSetPtr nodes, FILE *output) |
| 58 | { |
| 59 | int ret; |
| 60 | int size; |
| 61 | int i; |
| 62 | |
| 63 | xmlNodePtr cur; |
| 64 | xmlChar *node_child_value_string = NULL; |
| 65 | |
| 66 | assert(output); |
| 67 | size = (nodes) ? nodes->nodeNr : 0; |
| 68 | |
| 69 | for (i = 0; i < size; ++i) { |
| 70 | assert(nodes->nodeTab[i]); |
| 71 | |
| 72 | if (nodes->nodeTab[i]->type == XML_NAMESPACE_DECL) { |
| 73 | fprintf(stderr, "ERR:%s\n", |
| 74 | "This executable does not support xml namespacing\n"); |
| 75 | ret = -1; |
| 76 | goto end; |
| 77 | } else if (nodes->nodeTab[i]->type == XML_ELEMENT_NODE) { |
| 78 | cur = nodes->nodeTab[i]; |
| 79 | |
| 80 | if (xmlChildElementCount(cur) == 0) { |
| 81 | if (xmlNodeIsText(cur->children)) { |
| 82 | node_child_value_string = xmlNodeListGetString(doc, |
| 83 | cur->children, 1); |
| 84 | if (opt_verbose) { |
| 85 | fprintf(output, "%s;%s;\n", cur->name, |
| 86 | node_child_value_string); |
| 87 | } else { |
| 88 | fprintf(output, "%s\n", |
| 89 | node_child_value_string); |
| 90 | } |
| 91 | xmlFree(node_child_value_string); |
| 92 | } else { |
| 93 | /* We don't want to print non-final element */ |
| 94 | fprintf(stderr, "ERR:%s\n", |
| 95 | "Xpath expression return non-final xml element"); |
| 96 | ret = -1; |
| 97 | goto end; |
| 98 | } |
| 99 | } else { |
| 100 | /* We don't want to print non-final element */ |
| 101 | fprintf(stderr, "ERR:%s\n", |
| 102 | "Xpath expression return non-final xml element"); |
| 103 | ret = -1; |
| 104 | goto end; |
| 105 | } |
| 106 | |
| 107 | } else { |
| 108 | cur = nodes->nodeTab[i]; |
| 109 | if (opt_verbose) { |
| 110 | fprintf(output, "%s;%s;\n", cur->parent->name, cur->content); |
| 111 | } else { |
| 112 | fprintf(output, "%s\n", cur->content); |
| 113 | |
| 114 | } |
| 115 | } |
| 116 | } |
| 117 | /* Command Success */ |
| 118 | ret = 0; |
| 119 | |
| 120 | end: |
| 121 | return ret; |
| 122 | } |
| 123 | |
| 124 | /* |
| 125 | * Extract element corresponding to xpath |
| 126 | * xml_path The path to the xml file |
| 127 | * xpath: The xpath to evaluate. |
| 128 | * |
| 129 | * Evaluate an xpath expression onto an xml file. |
| 130 | * and print the result one by line. |
| 131 | * |
| 132 | * Returns 0 on success and a negative value otherwise. |
| 133 | */ |
| 134 | static int extract_xpath(const char *xml_path, const xmlChar *xpath) |
| 135 | { |
| 136 | xmlDocPtr doc = NULL; |
| 137 | xmlXPathContextPtr xpathCtx = NULL; |
| 138 | xmlXPathObjectPtr xpathObj = NULL; |
| 139 | |
| 140 | assert(xml_path); |
| 141 | assert(xpath); |
| 142 | |
| 143 | /* Parse the xml file */ |
| 144 | doc = xmlParseFile(xml_path); |
| 145 | if (!doc) { |
| 146 | fprintf(stderr, "ERR parsing: xml file invalid \"%s\"\n", xml_path); |
| 147 | return -1; |
| 148 | } |
| 149 | |
| 150 | /* Initialize a xpath context */ |
| 151 | xpathCtx = xmlXPathNewContext(doc); |
| 152 | if (!xpathCtx) { |
| 153 | fprintf(stderr, "ERR: XPath context invalid\n"); |
| 154 | xmlFreeDoc(doc); |
| 155 | return -1; |
| 156 | } |
| 157 | |
| 158 | /* Evaluate xpath expression */ |
| 159 | xpathObj = xmlXPathEvalExpression(xpath, xpathCtx); |
| 160 | if (!xpathObj) { |
| 161 | fprintf(stderr, "ERR: invalid xpath expression \"%s\"\n", xpath); |
| 162 | xmlXPathFreeContext(xpathCtx); |
| 163 | xmlFreeDoc(doc); |
| 164 | return -1; |
| 165 | } |
| 166 | |
| 167 | /* Print results */ |
| 168 | if (print_xpath_nodes(doc, xpathObj->nodesetval, stdout)) { |
| 169 | xmlXPathFreeObject(xpathObj); |
| 170 | xmlXPathFreeContext(xpathCtx); |
| 171 | xmlFreeDoc(doc); |
| 172 | return -1; |
| 173 | } |
| 174 | |
| 175 | /* Cleanup */ |
| 176 | xmlXPathFreeObject(xpathObj); |
| 177 | xmlXPathFreeContext(xpathCtx); |
| 178 | xmlFreeDoc(doc); |
| 179 | |
| 180 | return 0; |
| 181 | } |
| 182 | |
| 183 | int main(int argc, char **argv) |
| 184 | { |
| 185 | int opt; |
| 186 | |
| 187 | /* Parse command line and process file */ |
| 188 | while ((opt = getopt(argc, argv, "v")) != -1) { |
| 189 | switch (opt) { |
| 190 | case 'v': |
| 191 | opt_verbose = 1; |
| 192 | break; |
| 193 | default: |
| 194 | abort(); |
| 195 | } |
| 196 | } |
| 197 | |
| 198 | if (!(optind + 1 < argc)) { |
| 199 | fprintf(stderr, "ERR:%s\n", "Arguments missing"); |
| 200 | return -1; |
| 201 | } |
| 202 | |
| 203 | /* Init libxml */ |
| 204 | xmlInitParser(); |
| 205 | xmlKeepBlanksDefault(0); |
| 206 | if (access(argv[optind], F_OK)) { |
| 207 | fprintf(stderr, "ERR:%s\n", "Xml path not valid"); |
| 208 | return -1; |
| 209 | } |
| 210 | /* Do the main job */ |
| 211 | if (extract_xpath(argv[optind], (xmlChar *)argv[optind+1])) { |
| 212 | return -1; |
| 213 | } |
| 214 | |
| 215 | /* Shutdown libxml */ |
| 216 | xmlCleanupParser(); |
| 217 | |
| 218 | return 0; |
| 219 | } |
| 220 | |
| 221 | #else |
| 222 | int main(void) |
| 223 | { |
| 224 | fprintf(stderr, "XPath support not compiled in\n"); |
| 225 | return -1; |
| 226 | } |
| 227 | #endif |