001/****************************************************************************/ 002/* Copyright/Copyleft: 003 * 004 * For this source the LGPL Lesser General Public License, 005 * published by the Free Software Foundation is valid. 006 * It means: 007 * 1) You can use this source without any restriction for any desired purpose. 008 * 2) You can redistribute copies of this source to everybody. 009 * 3) Every user of this source, also the user of redistribute copies 010 * with or without payment, must accept this license for further using. 011 * 4) But the LPGL ist not appropriate for a whole software product, 012 * if this source is only a part of them. It means, the user 013 * must publish this part of source, 014 * but don't need to publish the whole source of the own product. 015 * 5) You can study and modify (improve) this source 016 * for own using or for redistribution, but you have to license the 017 * modified sources likewise under this LGPL Lesser General Public License. 018 * You mustn't delete this Copyright/Copyleft inscription in this source file. 019 * 020 * @author www.vishia.de/Java 021 * @version 2006-06-15 (year-month-day) 022 * list of changes: 023 * 2006-05-00: www.vishia.de creation 024 * 025 ****************************************************************************/ 026 027package org.vishia.xml; 028 029 030import java.util.*; 031import java.io.*; 032import java.nio.charset.Charset; 033 034import org.jdom.*; 035//import org.jdom.input.*; 036import org.jdom.input.SAXBuilder; 037import org.jdom.output.*; 038import org.jdom.transform.*; //JDOMInput, JdomOutput, Transformer contens not here! 039import javax.xml.transform.*; //Transformer 040import javax.xml.transform.stream.*; //StreamSource 041 042import org.vishia.mainCmd.Report; 043import org.vishia.xmlSimple.XmlException; 044 045/** The class XmlExtensions contains several methods to deal with XML data. 046 * It uses JDOM as basic access to XML. The JDOM library downloadable from www.jdom.org is necessary. 047 * <br> 048 * They are three groups of tasks of this class:<br> 049 * <ul><li>Access to inner parts of an element tree: This feature is implemented in the methods 050 * {@link getChildren(Element, String)}, {@link getTextFromPath(Element, String, boolean)} 051 * and {@link setTextToPath(Element, String, String )}. With this methods a textual controlled access 052 * to any content inside Elements is possible, without calling a XSL-translator, but directly. 053 * The textual given pathes are comparably to XPATH, but not equivalent.</li> 054 * <li>Read and write whole XML trees from files and to files. The beautification and handling of 055 * white spaces are topics of the appropriate methods read...() and write...().</li> 056 * <li>Wrapper to call the XSL translator {@link xslTransformString(Element, File)} and {@link xslTransformXml(Element, File)}. 057 * The cause of this wrappers are catch of exceptions and supply detached xml elements (not bind to a document) 058 * as result of XSL translation.</li> 059 * </ul> 060 * <br> 061 * The beautification and white space processing followes the necessities of document text processing. 062 * It is a own problem of XML, that spaces and new lines are determined by output formats (beautification) and content, 063 * both is not distinguishably. From there, in some XML formats no beautification is made. But such XML files are 064 * arduously readable by human. But in document texts, no linefeed should be present because linefeeds 065 * are not parts of the information itself, they are parts of the appearance of the information at printing. 066 * This is adequate to the spaces: Only one space in series may be a part of the information itself. White spaces are not. 067 * <br> 068 * From there any input with white spaces may be reduced to one space after reading from file. The routing 069 * {@link replaceWhiteSpaceWith1Space(Element, boolean)} implements this feature. The reduced input xml tree may be processed 070 * by any XSL translation or other XML processing in Java. The output may be beautificated for better readability by human. 071 * But the spaces of text content may be beware. The beautification is only made respecitvely to the superior organisation elements. 072 * The methods {@link beautificationBewareTextContent(Element)} and the {@link writeXmlBeautificatedTextFile(Element, File)} do so. 073 * <br> 074 * <br> 075 * Most of the methods are static, because no internal data are necessary. The output methods 076 * uses settings of some properties: encoding, indent mode, so they are based on a instance of XmlExtensions. 077 * A instance of XmlExtensions will be created by the static methods {@link create(Charset)}, 078 * the constructors are private. This is a design attempt ajar to factory patterns. 079 * The using of <code>new XmlExtension(...)</code> instead <code>create(...)</code> will be 080 * adequate, but here create() is the decision. 081 * 082<hr/> 083<pre> 084date who change 0852007-01-02 JcHartmut create() with Charset, some methods will be non static. 0862006-03-07 JcHartmut beautification, replaceWhiteSpaceWith1Space. 0872005-06-00 JcHartmut initial revision 088* 089</pre> 090<hr/> 091 */ 092public class XmlExtensions 093{ 094 /** The data held in the subclass.*/ 095 //private XmlMode mode; 096 097 /** The standard charset of this instance.*/ 098 //private static String sEncodingStandard = "ISO-8859-1"; 099 100 /** The xhtml namespace with prefix xhtml: xmlns:xhtml="http://www.w3.org/1999/xhtml" */ 101 //public final static Namespace xhtml = Namespace.getNamespace("xhtml","http://www.w3.org/1999/xhtml"); 102 103 /** Class hold some properties of transformation. 104 */ 105 public static class XmlMode 106 { 107 private static final int kConvertText = 1; 108 109 /** If this bit is setted in convertMode, a XML-output, 110 * not textual output ist produced. 111 */ 112 private static final int mConvertXml = 0x10; 113 114 /** The XML-output-conversion should be written in UFT8-Form. 115 * @deprecated 116 * */ 117 private static final int kConvertXmlUTF8 = mConvertXml + 0x0; 118 119 /** The XML-output-conversion should be written in ISO8859-Form. 120 * @deprecated 121 * */ 122 private static final int kConvertXmlIso8859 = mConvertXml + 0x1; 123 124 /** The XML-output-conversion should be written in ISO8859-Form. 125 * @deprecated 126 * */ 127 private static final int kConvertXmlASCII = mConvertXml + 0x2; 128 129 /** The encoding charset of this instance.*/ 130 private String sEncoding = "ISO-8859-1"; 131 132 133 private int convertMode = kConvertText; 134 135 /** The indent, may be "" or null if no wrapping. */ 136 private String sIndent = " "; 137 138 /** 139 * Sets the indent for wrapping. 140 * @param sIndent null if no wrapping, "" if wrapping without indent. 141 */ 142 public void setIndent(String sIndent){ this.sIndent = sIndent; } 143 144 public void setText() { convertMode = kConvertText; } 145 146 /**Sets the encoding to the given encoding. 147 */ 148 public void setEncoding(Charset encoding) 149 { this.sEncoding=encoding.name(); 150 } 151 152 /**Sets the encoding to UTF8. UTF8 is a standard supporting all char sets. 153 */ 154 public void setXmlUTF8() { convertMode = kConvertXmlUTF8; sEncoding="UTF-8"; } 155 156 157 /**Sets the encoding to ISO-8849-1. This is the standard char set on windows systems in west european. 158 */ 159 public void setXmlIso8859(){ convertMode = kConvertXmlIso8859; sEncoding = "ISO-8859-1";} 160 161 /**Sets the encoding to US-ASCII. This is the minimal standard char set, using only 7-bit-chars. 162 */ 163 public void setXmlASCII() { convertMode = kConvertXmlASCII; sEncoding = "US-ASCII"; } 164 165 public boolean isText(){ return convertMode == kConvertText; } 166 public boolean isXml(){ return (convertMode & mConvertXml) == mConvertXml; } 167 public boolean isIndent(){ return sIndent != null; } 168 public String getIndent(){ return sIndent; } 169 170 /** Returns the encoding as input for XmlExtensions.writeXmlFile(xmlTree, sFileName, <b>encoding<b>) 171 * @deprecated 172 */ 173 public int xxxgetEncoding() 174 throws XmlException 175 { switch(convertMode) 176 { case kConvertXmlUTF8 : return XmlExtensions.kEncoding_UTF_8; 177 case kConvertXmlIso8859: return XmlExtensions.kEncoding_ISO_8859_1; 178 default : throw new XmlException("getEncoding? - but unknown"); //, Report.exitWithArgumentError); 179 } 180 } 181 182 public String getEncoding() 183 { return sEncoding; 184 } 185 } 186 187 188 189 /** Internal constant for encoding the xml file. 190 * It was an idea to code the encoding not with a string, but with a constant. 191 * But this system is not extensible, the using of java.nio.charset.Charset 192 * is the better decision. See {@link create(Charset)}. 193 * @deprecated 194 */ 195 public final static int kEncoding_ISO_8859_1 = 1; 196 197 /** Internal constant for encoding the xml file. 198 * @deprecated */ 199 public final static int kEncoding_UTF_8 = 2; 200 /** Internal constant for encoding the xml file. 201 * @deprecated */ 202 public final static int kEncoding_ASCII = 3; 203 204 205 206 207 208 209 /** Gets the text content from the adressed node relativ to the given Element. 210 * This method is useable conveniently if the path is given in textual form 211 * outside the directly programming in java, but at example with control files (may be in XML) 212 * or from a conversion of XML data to a java routine (code generation). 213 <br> 214 The Path, param sXmlPath, may be given in the followed form, 215 <br> 216 example: "../element/@attribute" or "@name+$+{Parameter|@name|$}" <br> 217 <table border="1"><tr><th>sample</th><th>meaning</th></tr> 218 <tr><td><code>name </code></td><td>Normalized text content of the element</td></tr> 219 <tr><td><code>@name </code></td><td>content of the attribute</td></tr> 220 <tr><td><code>name/child </code></td><td>Normalized text content of the child element</td></tr> 221 <tr><td><code>name/@attr </code></td><td>content of the attribute within the element</td></tr> 222 <tr><td><code>name/name/@attr </code></td><td>The path can be in any deepness</td></tr> 223 <tr><td><code>../../name </code></td><td>parent from parent</td></tr> 224 <tr><td><code>"text" </code></td> 225 <td>the constant text between "". It may be a part of the path concating with other parts with +, 226 but it is also possible to return a text directly, without using the input xml element. 227 This choice is conveniently, because the behaviour may be determined outside of a java programming, 228 and no special case will be produced if only a simple text is expected.</td> 229 </tr> 230 <tr><td><code>path+"text"+path </code></td><td>concation between the texts from pathes</td></tr> 231 <tr><td><code>path+c+path </code></td><td>1 char between + is a added separator, it is the same as <code>+"c"+</code></td></tr> 232 <tr><td><code>path+c </code></td><td>1 char on end after +, same as here <code>+"."</code></td></tr> 233 <tr><td><code>{apath|vpath|c} </code></td> 234 <td>concation of all textes from <code><i>vpath</i></code> from all elements found with <code><i>apath</i></code>, 235 with <code><i>c</i></code> as separator between the concatenated result strings. <br> 236 With using this expression as part of sXmlPath it is possible to concatenate the content of 237 more as one elements without using of a extra programmed loop. 238 </td> 239 </tr> 240 </table> 241 @param xml The Element the path starts from. 242 @param sXmlPath The path expression see above. 243 @param bInvalidIsOk If true, than return "" on invalid path, if false than return null on invalid path. 244 @return The requested text content, "" or null if the path is not valid. 245 */ 246 @SuppressWarnings("unchecked") 247 public static String getTextFromPath(Element xml, String sXmlPath, boolean bInvalidIsOk) 248 { 249 boolean bDebug = false; //sXmlPath.startsWith(".."); 250 StringBuffer sText = new StringBuffer(100); 251 boolean bError = false; 252 Element xmlChild = xml; 253 if(bDebug) System.out.println(sXmlPath); 254 int pos1 = 0; 255 while(pos1 < sXmlPath.length()) 256 { //concat 257 char cc; 258 if( (cc = sXmlPath.charAt(pos1)) == '\'' || cc == '\"') 259 { 260 int posEnd = sXmlPath.indexOf(cc, pos1+1); //same char either "..." or '...' 261 if(posEnd < 0) posEnd = sXmlPath.length(); 262 sText.append(sXmlPath.substring(pos1+1, posEnd)); 263 pos1 = posEnd +1; 264 if(pos1 < sXmlPath.length()) 265 { pos1 = sXmlPath.indexOf('+', pos1); //search + as concation char 266 if(pos1 < 0) pos1 = sXmlPath.length(); //no further concation. 267 } 268 } 269 else if(pos1 < (sXmlPath.length()-1) && sXmlPath.charAt(pos1+1) == '+' 270 || pos1 == (sXmlPath.length()-1) //the last char 271 ) 272 { //$+ simple char before concation is a concation char 273 sText.append(sXmlPath.charAt(pos1)); 274 pos1 += 2; //may be pos1 >= length() 275 } 276 else if(sXmlPath.charAt(pos1) == '{') pos1 = getMultiText(sText, xml, sXmlPath, pos1); 277 else 278 { //text from path 279 int posEnd = sXmlPath.indexOf('+', pos1); 280 if(posEnd < 0) posEnd = sXmlPath.length(); //no concation: end of string 281 int posSep = sXmlPath.lastIndexOf('/', posEnd-1); 282 if(posSep > 0) 283 { if(bDebug) System.out.println(" child:"+ sXmlPath.substring(pos1, posSep)+":"); 284 List listChildren = getChildren(xml, sXmlPath.substring(pos1, pos1 + posSep)); 285 xmlChild = (listChildren.size()>0 ? (Element)(listChildren.get(0)) : null); 286 pos1 = posSep +1; 287 } 288 else xmlChild = xml; 289 290 if(xmlChild == null) bError = true; //1 error force return null if !bInvalidIsOk 291 else 292 { 293 if(sXmlPath.charAt(pos1) == '@') 294 { String sContent = xmlChild.getAttributeValue(sXmlPath.substring(pos1+1, posEnd)); 295 if(bDebug) System.out.println(" :" + sXmlPath.substring(pos1,posEnd) + ":" + xmlChild.getName() + "::" + sContent); 296 if(sContent == null) bError = true; //1 error force return null if !bInvalidIsOk 297 else sText.append(sContent); 298 } 299 else 300 { xmlChild = xmlChild.getChild(sXmlPath.substring(pos1, posEnd)); 301 if(xmlChild == null) bError = true; //1 error force return null if !bInvalidIsOk 302 else sText.append(xmlChild.getTextNormalize()); 303 } 304 } 305 pos1 = posEnd + 1; //after posConcat or > length() 306 } 307 } 308 if(bError && !bInvalidIsOk) return null; 309 else return sText.toString(); //may be "" if invalid path 310 } 311 312 313 /** Gets a list of children, not only from the given xml Element, but also from a deeper level. 314 * This method is useable conveniently if the path is given in textual form 315 * outside the directly programming in java, but at example with control files (may be in XML) 316 * or from a conversion of XML data to a java routine (code generation). 317 The Path, param sXmlPath, may be given in the followed form, 318 <br> 319 examples: 320 <ul><li><code>"element1/name"</code>: children of element1 with tag name "name"</li> 321 <li><code>"element1/*"</code>: all children of element1</li> 322 <li><code>"../name"</code>: all siblings with name</li> 323 <li><code>"../*"</code>: all siblings inclusive self</li> 324 <li><code>".."</code>: Only the parent of self</li> 325 <li><code>"../.."</code>: Only the parent of parent</li> 326 <li><code>"."</code>: Only self (it is the param xml itself)</li> 327 </ul> 328 If the path ends with <code>".."</code> or <code>"."</code>, the list contains only the addressed element, 329 it is not a list getted with <code>org.jdom.Element.getChildren()</code>, it is a simple LinkedList 330 with this one element. 331 <br> 332 But if the path ends with an named element or "*", the returned list is a list 333 getted with <code>org.jdom.Element.getChildren()</code>. 334 It means, that all operations may be applied to the list, including add of further siblings 335 using <code>thelist.listIterator().add(sibling);</code> 336 * 337 * @param xml The parent element. 338 * @param sXmlPath Path relativ from parent to the element from which the children are listed. 339 * If the path is "." or empty, 340 * @return List of children or null if any error in path. 341 */ 342 343 @SuppressWarnings("unchecked") 344 public static List<org.jdom.Parent> getChildren(Element xml, String sXmlPath) 345 { int pos1 = 0; 346 boolean bPathFound = false; 347 while(pos1 < sXmlPath.length() && !bPathFound) 348 { int posSep = sXmlPath.indexOf('/', pos1); 349 if(posSep >=0) 350 { bPathFound = false; 351 if(sXmlPath.substring(pos1, posSep).equals("..")) 352 { if(xml != null) xml = xml.getParentElement(); 353 } 354 else if(sXmlPath.substring(pos1, posSep).equals(".")) 355 { //nothing, "." is the actual element 356 } 357 else 358 { //really a child 359 if(xml != null) xml = xml.getChild(sXmlPath.substring(pos1, posSep)); 360 } 361 pos1 = posSep +1; 362 } 363 else bPathFound = true; 364 } 365 List<org.jdom.Parent> listChildren; 366 if(xml != null) 367 { if(sXmlPath.substring(pos1).equals("..")) 368 { //the parent is the meant children 369 listChildren = new LinkedList<org.jdom.Parent>(); 370 listChildren.add(xml.getParent()); 371 } 372 else if(sXmlPath.substring(pos1).equals(".")) 373 { //the addressed element directly 374 listChildren = new LinkedList<org.jdom.Parent>(); 375 listChildren.add(xml); 376 } 377 else 378 { //jdom-like children list 379 listChildren = xml.getChildren(sXmlPath.substring(pos1)); 380 } 381 } 382 else{ listChildren = null; } 383 return listChildren; 384 } 385 386 387 388 389 390 private static int getMultiText(StringBuffer sText, Element xml, String sXmlPath, int pos1) 391 { //boolean bOk = true; 392 //{ 393 int posSepEnd = sXmlPath.indexOf('}', pos1); 394 int posSep1 = sXmlPath.indexOf('|', pos1); 395 int posSep2 = sXmlPath.indexOf('|', posSep1 +1); //may be 0 if posSep1 = -1 396 if(posSepEnd < 0 || posSep1 < 0 || posSep2 < 0) 397 { sText.append("::ERROR::Syntay {||}:" + sXmlPath.substring(pos1)); 398 //bOk = false; 399 } 400 else 401 { String sApath = sXmlPath.substring(pos1+1, posSep1); 402 String sVpath = sXmlPath.substring(posSep1+1, posSep2); 403 String sSeparator = sXmlPath.substring(posSep2+1, posSepEnd); 404 //System.out.println("getMultiText:" + sApath + ":" + sVpath + ":" ); 405 List<org.jdom.Parent> listChildren = getChildren(xml, sApath); 406 Iterator<org.jdom.Parent> iterChildren = listChildren.iterator(); 407 boolean bFirst = true; 408 while(iterChildren.hasNext()) 409 { Element xmlChild = (Element)(iterChildren.next()); 410 //System.out.println(" -"+ xmlChild.getName()); 411 String sContent = getTextFromPath(xmlChild, sVpath, true); 412 if(bFirst) bFirst = false; 413 else sText.append(sSeparator); 414 sText.append(sContent); 415 } 416 } 417 if(posSepEnd >0) return posSepEnd +1; 418 else return sXmlPath.length(); //error: end not found 419 } 420 421 422 423 424 425 426 427 428 429 /** Sets the specified element with the given value. 430 The Path may be given in the followed form (example): "../element/@attribute" 431 @param xml Element within the content is setted 432 @param sXmlPath Path selects the child within xml, into the sContent is setted. 433 The path should be given in form "../../name/name/dst". 434 The specification ".." means the parent, like XPATH. 435 All childs are created, if they don't exist.<br/> 436 The last element of the path may be a special specification: 437 <table border=1> 438 <tr><th>form</th><th>Description</th></tr> 439 <tr><td>name</td><td>the child with the name</td></tr> 440 <tr><td>.</td><td>The element itself (sXmlPath="." is the input element itself)</td></tr> 441 <tr><td>@name</td><td>example: "tag/@name". Sets the content into the attribute name of the tag</td></tr> 442 <tr><td>=</td><td>example: "tag/tag2/=". Sets the content into the element tag2, 443 otherwise the content is appended to name2</td></tr> 444 <tr><td>!</td><td>example: "tag/tag2/!". Changes the name of the selected xml-Element tag2 to the sContent</td></tr> 445 </table> 446 @param sContent content set to the element, attribute or defines the name of the element. 447 @return true if succesfull, false if the element is not found or other error (if no effect). 448 */ 449 public static boolean setTextToPath(Element xml, String sXmlPath, String sContent) 450 { boolean bOk = false; 451 if(sContent == null) sContent = "XXX"; 452 Element xmlChild = xml; 453 int pos1 = 0; 454 int posSep; 455 if(sXmlPath.length()>0 && sContent != null) 456 { boolean bCont = true; 457 do 458 { posSep = sXmlPath.indexOf('/', pos1); 459 if(posSep < 0) //the last part of the path: 460 { posSep = sXmlPath.length(); 461 bCont = false; 462 } 463 if(sXmlPath.substring(pos1, posSep).equals("..")) 464 { xmlChild = xmlChild.getParentElement(); 465 } 466 else if(sXmlPath.substring(pos1, posSep).equals(".")) 467 { //nothing, "." is the actual element 468 } 469 else if(sXmlPath.substring(pos1).startsWith("!")) 470 { //the rest is the new tag name, evaluate later. 471 bCont = false; 472 } 473 else if(sXmlPath.substring(pos1).startsWith("@")) 474 { //the rest is the atrribute name, evaluate later. 475 bCont = false; 476 } 477 else if(sXmlPath.substring(pos1).startsWith("=")) 478 { //delete the content of the element before setting new. 479 xmlChild.setText(""); //delete the textual content 480 } 481 else 482 { //really a child 483 Element xmlChildNew; 484 xmlChildNew = xmlChild.getChild(sXmlPath.substring(pos1, posSep)); 485 if(xmlChildNew == null) 486 { //no such child, create it: 487 xmlChild = new Element(sXmlPath.substring(pos1, posSep)); 488 } 489 else 490 { xmlChild = xmlChildNew; 491 } //it is it. 492 } 493 if(bCont) { pos1 = posSep +1; } 494 } while(bCont && xmlChild != null); 495 496 if(xmlChild != null) 497 { if(sXmlPath.charAt(pos1) == '@') 498 { xmlChild.setAttribute(sXmlPath.substring(pos1+1), sContent); 499 bOk = true; 500 } 501 else if(sXmlPath.charAt(pos1) == '!') 502 { xmlChild.setName(sContent); 503 bOk = true; 504 } 505 else 506 { xmlChild.addContent(sContent); 507 bOk = true; 508 } 509 } 510 } 511 return bOk; 512 } 513 514 515 516 /** Reads a xml file and convert it to a internal xml tree. This is a simple frame arround SAXBuilder. 517 On any problem an exception is thrown. This may be a file-not-found or a parsing problem with the content of the xml-file. 518 519 @return The root element 520 */ 521 public static Element readXmlFile(File file) 522 throws XmlException 523 { try 524 { SAXBuilder builder = new SAXBuilder(); 525 Document doc = builder.build( file ); 526 return doc.getRootElement(); 527 } 528 catch(JDOMException exception) 529 { throw new XmlException("conversion readed xml-File " + file.getAbsolutePath() + exception.getMessage()); 530 } 531 catch(IOException exception) 532 { throw new XmlException("reading xml-File " + file.getAbsolutePath() + exception.getMessage()); 533 } 534 } 535 536 537 538 /** Reads a xml file and convert it to a internal xml tree. This is a more complex frame arround SAXBuilder. 539 * The special solution is the followed:<br> 540 * * By reading from file every newline is converted to a space char, except the first line. 541 * It means, the file have only 2 lines, the head line and a very long second line 542 * with the whole content.<br> 543 * * After reading all whitespaces are converted to one space using {@link replaceWhiteSpaceWith1Space(Element, boolean)}.<br> 544 * <br> 545 * The effect is the followed: The textual content from the inputted XML file 546 * doesn 't contain any whitespaces, or line feed. Every white space is converted to one spaces. 547 * More as one space in serial doesn't exist. No line structure is given. 548 * If a prior beautificated input is there, it is now non-beatificated, simple. 549 * This is a format appropriately useable for document processing with document formatting structures. 550 * <br> 551 * @param file The file from which xml is readed. 552 * @return The root element without whitespaces. 553 */ 554 public static Element readXmlFileTrimWhiteSpace(File file) 555 throws XmlException 556 { 557 558 559 class InputStreamSpecial extends InputStream 560 { 561 private final InputStream in; 562 int cNext = -1; 563 int lineCt = 0; 564 565 InputStreamSpecial(File file) 566 { InputStream in; 567 try{ in = new FileInputStream(file);} 568 catch(FileNotFoundException ex){ in = null;} 569 this.in = in; 570 } 571 572 public int read() throws IOException 573 { 574 if(in != null) 575 { int cc; 576 if(cNext >0) 577 { cc = cNext; 578 } 579 else 580 { 581 } 582 cc = in.read(); 583 if(cc == 0x0d && lineCt >0) 584 { cc = 0x20; 585 } 586 if(cc == 0x0a) 587 { if(lineCt >0) //first line: no change! 588 { cc = 0x20; 589 } 590 lineCt +=1; 591 } 592 return cc; 593 } 594 else return -1; 595 } 596 597 public boolean isReadable(){ return in != null; } 598 }//InputStreamSpecial 599 600 //use the special reader inside this method: 601 InputStreamSpecial input = new InputStreamSpecial(file); 602 if(!input.isReadable()) 603 { throw new XmlException("file not found: " + file.getAbsolutePath()); 604 } 605 try 606 { SAXBuilder builder = new SAXBuilder(); 607 Document doc = builder.build( input ); 608 //Document doc = builder.build( file ); 609 Element root = doc.getRootElement(); 610 replaceWhiteSpaceWith1Space(root, false); 611 return root; 612 } 613 catch(Exception exception) 614 { throw new XmlException("conversion given xml-String " + exception.getMessage()); 615 } 616 } 617 618 619 /** Replaces white spaces of all text()-content with one space, by keeping 620 * the inner leading and trailing spaces. 621 * The original behaviour of XML outputter is the possibility of replacement 622 * of all whitespace with one space, but only in combination with trimming 623 * all leading and trailing spaces with no replacement with one spaces. 624 * This property sometimes is not useable, at example in a combination of 625 * "text <b>This is bold </b>The last space is a bold space." 626 * The trimming ignores the space and the text would be corrupted. 627 * @param xml The element to be white-spaces-trimmed.<br/> 628 * @param bContendText Normally let it false by calling outside! 629 * If false, than test of containing text(), if it contains text() than 630 * delete the first leading and the last trailing whitespace 631 * and call recursively for inner elements with true.<br/> 632 * If true than normalize, but keep one space 633 * instead of possible leading or trainling white spaces. 634 * This is the correct choice for inner (recursively) calling.<br/> 635 */ 636 637 @SuppressWarnings("unchecked") 638 public static void replaceWhiteSpaceWith1Space(Element xml, boolean bContendText) 639 { boolean bNewContendText = false; 640 if(!bContendText) 641 { //it is a textual paragraph if the element is a <p>-tag. 642 String sTest = xml.getTextNormalize(); //get all text 643 if(sTest.length()>0) 644 { bContendText = true; 645 bNewContendText = true; 646 } 647 //bContendText = xml.getName().equals("p"); 648 } 649 { List listChild = xml.getContent(); 650 //List listChildNew = new LinkedList(); 651 Iterator iterChild = listChild.iterator(); 652 bNewContendText = false; //test: what is with preserving the first and last space anyway 653 654 boolean bFirst = bNewContendText; //only on first element of first Text 655 while(iterChild.hasNext()) 656 { Content xmlChild = (Content)iterChild.next(); 657 //xmlChild.detach(); 658 if(xmlChild instanceof org.jdom.Text && bContendText) 659 { org.jdom.Text text = ((org.jdom.Text)(xmlChild)); 660 String sContent = text.getText(); 661 int nChars = sContent.length(); 662 if(nChars>0) 663 { char char1 = sContent.charAt(0); 664 char char9 = sContent.charAt(nChars-1); 665 //:NOTE: \r\n is changed to \n on input parser. 666 sContent = org.jdom.Text.normalizeString(sContent); 667 //without leading and trailing spaces, only one space instead whitespace. 668 if(sContent.startsWith("dieses hier")) 669 { 670 sContent = "dieses hier "; 671 } 672 if(char1 == ' ' || char1 == '\n' || char9 == ' ' || char9 == '\n' ) 673 { //only of any reason to change, fill sContent new. 674 sContent = ( (!bFirst && char1 == ' ') ? " " : "") 675 + ( (!bFirst && char1 == '\n') ? " " : "") 676 + sContent 677 + (( ( iterChild.hasNext() //not the last element 678 || !bNewContendText //but always on inner elements 679 ) 680 &&(nChars >1) //input at least 2 chars 681 && sContent.length()>0 //any content else 682 && (char9 == ' '|| char1 == '\n') //last char is white space 683 ) 684 ? " " //than append space. 685 : "" 686 ) 687 ; 688 } 689 } 690 text.setText(sContent); 691 //listChildNew.add(new org.jdom.Text(sContent)); 692 } 693 else if(xmlChild instanceof org.jdom.Element) 694 { //process it recursively 695 replaceWhiteSpaceWith1Space((org.jdom.Element)xmlChild, bContendText); 696 //listChildNew.add(xmlChild); 697 } 698 else 699 { //listChildNew.add(xmlChild); //unchanged. 700 } 701 bFirst = false; 702 } 703 } 704 } 705 706 707 708 /**Beautificates the content of the Element with respecting of textual content preserving. 709 * Copies the Element content inclusive childs into a new Element and returns the new Element. 710 * If an element (child element) contents text(), than the content is copied without changes, 711 * like xml:space="preserve". 712 * <br> 713 * If no text() is containing, all empty (only whitespace)-text()- 714 * elements are ignored, and between elements, linefeed and indent is added. 715 * So the output written with org.jdom.output.Format.getRawFormat() is beautificated. 716 * This method is called recursively if child Elements are present. 717 * <br> 718 * Example: if the input xml tree contains: 719 * <pre><sampleTag><innerTag><p>This is text<b> with bold </b>and i<b>nn</b>er bold text</p></innerTag></sampleTag> 720 * </pre> 721 * The output will be:<pre> 722 * <sampleTag> 723 * <innerTag> 724 * <p>This is text<b> with bold </b>and i<b>nn</b>er bold text</p> 725 * </innerTag> 726 * </sampleTag> 727 * </pre> 728 * <br> 729 * Another way to get beautificated output may be using the formatting possibilities of JDOM, 730 * but they take no consideration of inner textual elements. If beautification is choiced, 731 * all elements are beautificated and the original space structure may be corrupted. 732 * The above example will written in form 733 * <pre> 734 * <sampleTag> 735 * <innerTag> 736 * <p>This is text 737 * <b> with bold 738 * </b> 739 * and i 740 * <b>nn 741 * </b> 742 * er bold text 743 * </p> 744 * </innerTag> 745 * </sampleTag> 746 * </pre> 747 * In this sample always a whitespace is produced between elements in output, 748 * even if the user will not have a space. 749 * <br> 750 * Note: The identation is limited to approximately a half page width (length of sIndent). 751 * <br> 752 * Note: other content as Element and Text are not supported yet (:TODO:). 753 * 754 * 755 * @param xml The Input element tree. 756 * @return A new XML tree with beautificated content. 757 */ 758 public static Element beautificationBewareTextContent(Element xml) 759 { //first call 760 return BeautificationNoTextContent.beautificationNoTextContent(xml, false, 0); 761 } 762 763 /** Inner class for Beautification */ 764 private static class BeautificationNoTextContent 765 { 766 767 /** constant String to realize indent.*/ 768 static final String sIndent = "\n "; 769 770 /** Maximal number of indentation, determined by length of sIndent.*/ 771 static final int nMaxIndent = sIndent.length()/2 -2; 772 773 774 /** Inner Recursively call variant of the public method. 775 * 776 * @param xml Input Element 777 * @param bContendText True, than text()-content is present in xml Input or parents. 778 * @param nLevel level of indentation, count by every recursively call. 779 * @return 780 */ 781 @SuppressWarnings("unchecked") 782 private static Element beautificationNoTextContent(Element xml, boolean bContendText, int nLevel) 783 { Element xmlOut = new Element(xml.getName(), xml.getNamespace()); 784 785 { //copy Attributes 786 { List attributes = xml.getAttributes(); 787 Iterator iter = attributes.iterator(); 788 while(iter.hasNext()) 789 { Attribute attrib = ((Attribute)(iter.next())); 790 xmlOut.setAttribute(attrib.getName(), attrib.getValue()); 791 } 792 } 793 } 794 795 { //copy AdditionalNamespaces 796 List listAddNs = xml.getAdditionalNamespaces(); 797 if(listAddNs.size() >0) 798 { Iterator iter = listAddNs.iterator(); 799 while(iter.hasNext()) 800 { Namespace ns = (Namespace)(iter.next()); 801 xmlOut.addNamespaceDeclaration(ns); 802 } 803 } 804 } 805 806 if(nLevel > nMaxIndent ){ nLevel = nMaxIndent; } //no further indent. 807 if(false && !bContendText) 808 { //test of containment of text(), than set bContendText, with effect also to childs. 809 String sTest = xml.getTextNormalize(); //get all text with trimmed spaces. 810 if(sTest.length()>0) 811 { bContendText = true; 812 } 813 //bContendText = xml.getName().equals("p"); //older test 814 } 815 { List listChild = xml.getContent(); 816 //List listChildNew = new LinkedList(); 817 Iterator iterChild = listChild.iterator(); 818 while(iterChild.hasNext()) 819 { Content xmlChild = (Content)iterChild.next(); 820 //xmlChild.detach(); //:NOTE: detach causes a Iterator exception! 821 if(xmlChild instanceof org.jdom.Text) // && bContendText) 822 { org.jdom.Text text = ((org.jdom.Text)(xmlChild)); 823 if(bContendText) 824 { //if a textual content was always detected before, copy exactly. 825 String sContent = text.getText(); 826 xmlOut.addContent(sContent); //add it. 827 } 828 else 829 { //not textual content before: 830 String sContent = text.getText(); 831 int posStart = 0; int posEnd = sContent.length(); 832 while(!bContendText && posStart < posEnd) 833 { char cc = sContent.charAt(posStart); 834 if("\r\n \t".indexOf(cc) >=0){ posStart+=1; } 835 else { bContendText = true; } 836 } 837 if(bContendText) 838 { //really a text 839 xmlOut.addContent(sContent); //.substring(posStart)); //add it. 840 } 841 else 842 { //ignore the text, if no textual content before and the text is empty. 843 //It is a beautification from input. 844 //The beautification will be added in a own kind. 845 } 846 } 847 } 848 else if(xmlChild instanceof Element) 849 { //other element: 850 if(!bContendText) 851 { //insert indent before: 852 xmlOut.addContent(sIndent.substring(0, 1+2*nLevel)); 853 } 854 //call recursively this method, after it add the result element. 855 xmlOut.addContent(beautificationNoTextContent((Element)xmlChild, bContendText, nLevel+1)); 856 } 857 858 } 859 860 if(false && !bContendText && nLevel >0) 861 { //insert line feed and indent after the element: 862 xmlOut.addContent(sIndent.substring(0, 1+2*(nLevel-1))); 863 } 864 } 865 return xmlOut; 866 } 867 }//class BeautificationNoTextContent 868 869 /** Write the content of the xml-node in the report 870 */ 871 public static void reportContentElement(Element xml, Report report) 872 { 873 report.reportln(Report.info, "reportContentElement: " + xml.getName()); 874 } 875 876 877 /**Transform a xml tree to a new tree. The input tree is started from an detached xml Element. 878 @param xmlOutResult instance of Result to accumalate the output. 879 */ 880 private static void xslTransform(Element xmlInput, File fXsl, Result xmlOutResult) 881 throws XmlException 882 { 883 if(!fXsl.exists()) 884 { throw new XmlException("xslTransformation: xsl-file not found: " + fXsl.getName()); 885 } 886 Transformer xslTransformer = null; 887 try 888 { 889 xslTransformer = TransformerFactory.newInstance().newTransformer(new StreamSource(fXsl)); 890 } 891 catch (TransformerException exception) 892 { throw new XmlException("xslTransformation: error in xsl-file: " + fXsl.getName() + exception.getMessage()); 893 } 894 Document docIn = new Document(); 895 xmlInput.detach(); //it may have used in a transformation before. 896 docIn.setRootElement(xmlInput); 897 try{ xslTransformer.transform(new JDOMSource(docIn), xmlOutResult); } 898 catch (TransformerException exception) 899 { throw new XmlException("xslTransformation: error in xsl-file: " + fXsl.getName() + exception.getMessage()); 900 } 901 902 } 903 904 905 906 /**XSL-Transformation of a xml tree to a new tree. The input tree is started from an detached xml Element. 907 * Internally javax.xml is used. 908 @return The new xml tree. The returned root element has not a parent, it is detached. 909 So it can be added to any other tree. 910 */ 911 public static Element xslTransformXml(Element xmlInput, File xslFile) 912 throws XmlException 913 { 914 JDOMResult xmlOutResult = new JDOMResult(); 915 xslTransform(xmlInput, xslFile, xmlOutResult); 916 if(!xmlOutResult.getDocument().hasRootElement()) 917 { throw new XmlException("xslTransformationXml: no root element produced"); 918 } 919 //if success than return the detached root element from conversion document. 920 //The document is further unnecessary and will be deleted by the garbage collector. 921 Element xmlOut = xmlOutResult.getDocument().getRootElement(); 922 xmlOut.detach(); 923 return xmlOut; 924 } 925 926 927 928 929 /**Transform a xml tree to a string. The input tree is started from an detached xml Element. 930 @return the string. 931 */ 932 public static String xslTransformString(Element xmlInput, File xslFile) 933 throws XmlException 934 { 935 ByteArrayOutputStream outputStream = new ByteArrayOutputStream(10000); //increased if necessary 936 Result xmlOutResult = new StreamResult(outputStream); 937 938 xslTransform(xmlInput, xslFile, xmlOutResult); 939 return outputStream.toString(); 940 } 941 942 943 /**Output a xml tree to a file with beautification of the output, 944 * but beware all spaces inside textual content. It calls internally {@link beautificationNoTextContent(Element)} 945 * <br> 946 * @param xmlRoot The root element 947 * @param fileOut The file to write out. The file will be created or replaced. 948 * @throws FileNotFoundException If the fileOut doesn't match. 949 */ 950 public static void writeXmlBeautificatedTextFile(Element xmlRoot, File fileOut, Charset encoding) 951 throws XmlException, FileNotFoundException 952 { Element xmlRootBeautificated = beautificationBewareTextContent(xmlRoot); 953 XmlMode mode = new XmlMode(); 954 mode.setEncoding(encoding); 955 mode.setIndent(null); 956 writeXmlFile(xmlRootBeautificated, fileOut, mode); 957 } 958 959 960 961 /**Output a xml tree to a file. 962 * @throws FileNotFoundException if the sFileOut doesn't match 963 * @deprecated 964 */ 965 public static void writeXmlFile(Element xmlRoot, String sFileOut) 966 throws XmlException, FileNotFoundException 967 { XmlMode mode = new XmlMode(); 968 writeXmlFile(xmlRoot, new File(sFileOut), mode); 969 } 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 /**Output a xml tree to a file. 986 * if setIndent() is called with null-Argument, than write without wrapping. 987 * Otherwise, wrap with the given indent. 988 * If the output is written with indent, it is revitalize readable. Some times, the text should be written 989 * with exactly spaces, not with white spaces, from there setIndent(null) should be called before. 990 * The output will be written with the charset defined by {@link create(Charset)} or {@link setEncoding(int)}. 991 * @param xmlRoot The root Element of the created output XML-File. 992 * @param fileOut This file will be created or replaced. 993 * @exception FileNotFoundException if the file exists but is a directory 994 * rather than a regular file, does not exist but cannot 995 * be created, or cannot be opened for any other reason 996 * @throws IOException if any error on writing at file system 997 * @exception SecurityException if a security manager exists and its 998 * <code>checkWrite</code> method denies write access 999 * to the file. 1000 */ 1001 public void writeXmlFile(Element xmlRoot, File fileOut) 1002 throws XmlException, FileNotFoundException 1003 { FileOutputStream fOut = null; 1004 //try 1005 { fOut = new FileOutputStream(fileOut); 1006 Document docu = new Document(); 1007 docu.setRootElement(xmlRoot); 1008 XMLOutputter writerXml = new XMLOutputter(); 1009 org.jdom.output.Format format = org.jdom.output.Format.getRawFormat(); 1010 //format.setNewlines(bIndent); 1011 //xmlFormat.setIndent(" "); 1012 //format.setIndent(" "); 1013 //format.setLineSeparator("\n"); 1014 //format.setExpandEmptyElements(true); 1015 writerXml.setFormat(format); 1016 try 1017 { writerXml.output( docu, fOut ); 1018 fOut.close(); 1019 } 1020 catch(IOException exc) 1021 { throw new XmlException("Any error writing file:" + exc.getMessage()); 1022 } 1023 } 1024 //catch(IOException exception){ throw new XmlException("write xml-output-file: " + fileOut.getAbsolutePath()); } 1025 } 1026 1027 1028 1029 /**Output a xml tree to a file. 1030 * @param xmlRoot The root Element of the created output XML-File. 1031 * @param fileOut This file will be created or replaced. 1032 * @param sEncoding The encoding, typicall "ISO-8859-1" 1033 * @exception FileNotFoundException if the file exists but is a directory 1034 * rather than a regular file, does not exist but cannot 1035 * be created, or cannot be opened for any other reason 1036 * @throws IOException if any error on writing at file system 1037 * @exception SecurityException if a security manager exists and its 1038 * <code>checkWrite</code> method denies write access 1039 * to the file. 1040 */ 1041 public static void writeXmlFile(Element xmlRoot, File fileOut, XmlMode mode) 1042 throws XmlException, FileNotFoundException 1043 { FileOutputStream fOut = null; 1044 //try 1045 { fOut = new FileOutputStream(fileOut); 1046 Document docu = new Document(); 1047 docu.setRootElement(xmlRoot); 1048 XMLOutputter writerXml = new XMLOutputter(); 1049 org.jdom.output.Format format = org.jdom.output.Format.getRawFormat(); 1050 format.setEncoding(mode.getEncoding()); 1051 //format.setExpandEmptyElements(true); 1052 writerXml.setFormat(format); 1053 try 1054 { writerXml.output( docu, fOut ); 1055 fOut.close(); 1056 } 1057 catch(IOException exc) 1058 { throw new XmlException("Any error writing file:" + exc.getMessage()); 1059 } 1060 } 1061 //catch(IOException exception){ throw new XmlException("write xml-output-file: " + fileOut.getAbsolutePath()); } 1062 } 1063 1064 1065 1066 1067 /**Output a xml tree to a file. 1068 * @param xmlRoot The root Element of the created output XML-File. 1069 * @param fileOut This file will be created or replaced. 1070 * @param sEncoding The encoding, typicall "ISO-8859-1" 1071 * @exception FileNotFoundException if the file exists but is a directory 1072 * rather than a regular file, does not exist but cannot 1073 * be created, or cannot be opened for any other reason 1074 * @throws IOException if any error on writing at file system 1075 * @exception SecurityException if a security manager exists and its 1076 * <code>checkWrite</code> method denies write access 1077 * to the file. 1078 */ 1079 public static void writeXmlDirect(Element xml, File fileOut, String sEncoding) 1080 throws XmlException, FileNotFoundException 1081 { FileWriter out = null; 1082 //try 1083 { try 1084 { 1085 out = new FileWriter(fileOut); 1086 writeXmlDirect(xml, out, sEncoding); 1087 } catch (IOException e) 1088 { 1089 // TODO Auto-generated catch block 1090 e.printStackTrace(); 1091 } 1092 } 1093 } 1094 /**Output a xml tree to a file. 1095 * @param xmlRoot The root Element of the created output XML-File. 1096 * @param fileOut This file will be created or replaced. 1097 * @param sEncoding The encoding, typicall "ISO-8859-1" 1098 * @exception FileNotFoundException if the file exists but is a directory 1099 * rather than a regular file, does not exist but cannot 1100 * be created, or cannot be opened for any other reason 1101 * @throws IOException if any error on writing at file system 1102 * @exception SecurityException if a security manager exists and its 1103 * <code>checkWrite</code> method denies write access 1104 * to the file. 1105 */ 1106 @SuppressWarnings("unchecked") 1107 public static void writeXmlDirect(Element xml, Writer out, String sEncoding) 1108 throws XmlException, IOException 1109 { //try 1110 out.write("<" + xml.getName()); 1111 { //copy Attributes 1112 { List attributes = xml.getAttributes(); 1113 Iterator iter = attributes.iterator(); 1114 while(iter.hasNext()) 1115 { Attribute attrib = ((Attribute)(iter.next())); 1116 //xmlOut.setAttribute(attrib.getName(), attrib.getValue()); 1117 out.write(" " + attrib.getName() + "=" + "\"" + attrib.getValue() + "\""); 1118 } 1119 } 1120 } 1121 out.write(">"); 1122 1123 { //copy AdditionalNamespaces 1124 List listAddNs = xml.getAdditionalNamespaces(); 1125 if(listAddNs.size() >0) 1126 { Iterator iter = listAddNs.iterator(); 1127 while(iter.hasNext()) 1128 { Namespace ns = (Namespace)(iter.next()); 1129 out.write(" xmlns:" + ns.getPrefix() + "=\"" + ns.getURI() + "\""); 1130 } 1131 } 1132 } 1133 1134 { List listChild = xml.getContent(); 1135 //List listChildNew = new LinkedList(); 1136 Iterator iterChild = listChild.iterator(); 1137 while(iterChild.hasNext()) 1138 { Content xmlChild = (Content)iterChild.next(); 1139 //xmlChild.detach(); //:NOTE: detach causes a Iterator exception! 1140 if(xmlChild instanceof org.jdom.Text) // && bContendText) 1141 { org.jdom.Text text = ((org.jdom.Text)(xmlChild)); 1142 String sContent = text.getText(); 1143 //if(sContent.indexOf('<')>0 || ) 1144 //if(sContent) 1145 out.write(sContent); 1146 } 1147 else if(xmlChild instanceof Element) 1148 { //other element: 1149 //call recursively this method, after it add the result element. 1150 writeXmlDirect((Element)xmlChild, out, sEncoding); 1151 } 1152 1153 } 1154 1155 } 1156 } 1157 1158 1159 1160 1161 1162} 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176