Source code

001/****************************************************************************/
002/* Copyright/Copyleft:
003 *
004 * For this source the LGPL Lesser General Public License,
005 * published by the Free Software Foundation is valid.
006 * It means:
007 * 1) You can use this source without any restriction for any desired purpose.
008 * 2) You can redistribute copies of this source to everybody.
009 * 3) Every user of this source, also the user of redistribute copies
010 *    with or without payment, must accept this license for further using.
011 * 4) But the LPGL ist not appropriate for a whole software product,
012 *    if this source is only a part of them. It means, the user
013 *    must publish this part of source,
014 *    but don't need to publish the whole source of the own product.
015 * 5) You can study and modify (improve) this source
016 *    for own using or for redistribution, but you have to license the
017 *    modified sources likewise under this LGPL Lesser General Public License.
018 *    You mustn't delete this Copyright/Copyleft inscription in this source file.
019 *
020 * @author www.vishia.de/Java
021 * @version 2006-06-15  (year-month-day)
022 * list of changes:
023 * 2006-05-00: www.vishia.de creation
024 *
025 ****************************************************************************/
026
027package org.vishia.xml;
028
029
030import java.util.*;
031import java.io.*;
032import java.nio.charset.Charset;
033
034import org.jdom.*;
035//import org.jdom.input.*;
036import org.jdom.input.SAXBuilder;
037import org.jdom.output.*;
038import org.jdom.transform.*;          //JDOMInput, JdomOutput, Transformer contens not here!
039import javax.xml.transform.*;         //Transformer
040import javax.xml.transform.stream.*;  //StreamSource
041
042import org.vishia.mainCmd.Report;
043import org.vishia.xmlSimple.XmlException;
044
045/** The class XmlExtensions contains several methods to deal with XML data.
046 * It uses JDOM as basic access to XML. The JDOM library downloadable from www.jdom.org is necessary.
047 * <br>
048 * They are three groups of tasks of this class:<br>
049 * <ul><li>Access to inner parts of an element tree: This feature is implemented in the methods
050 *   {@link getChildren(Element, String)}, {@link getTextFromPath(Element, String, boolean)}
051 *   and {@link setTextToPath(Element, String, String )}. With this methods a textual controlled access 
052 *   to any content inside Elements is possible, without calling a XSL-translator, but directly.
053 *   The textual given pathes are comparably to XPATH, but not equivalent.</li>
054 * <li>Read and write whole XML trees from files and to files. The beautification and handling of
055 *   white spaces are topics of the appropriate methods read...() and write...().</li>
056 * <li>Wrapper to call the XSL translator {@link xslTransformString(Element, File)} and {@link xslTransformXml(Element, File)}.
057 *   The cause of this wrappers are catch of exceptions and supply detached xml elements (not bind to a document)
058 *   as result of XSL translation.</li>
059 * </ul>
060 * <br>
061 * The beautification and white space processing followes the necessities of document text processing.
062 * It is a own problem of XML, that spaces and new lines are determined by output formats (beautification) and content,
063 * both is not distinguishably. From there, in some XML formats no beautification is made. But such XML files are
064 * arduously readable by human. But in document texts, no linefeed should be present because linefeeds 
065 * are not parts of the information itself, they are parts of the appearance of the information at printing.
066 * This is adequate to the spaces: Only one space in series may be a part of the information itself. White spaces are not.
067 * <br>
068 * From there any input with white spaces may be reduced to one space after reading from file. The routing 
069 * {@link replaceWhiteSpaceWith1Space(Element, boolean)} implements this feature. The reduced input xml tree may be processed
070 * by any XSL translation or other XML processing in Java. The output may be beautificated for better readability by human.
071 * But the spaces of text content may be beware. The beautification is only made respecitvely to the superior organisation elements.
072 * The methods {@link beautificationBewareTextContent(Element)} and the {@link writeXmlBeautificatedTextFile(Element, File)} do so. 
073 * <br>
074 * <br>
075 * Most of the methods are static, because no internal data are necessary. The output methods
076 * uses settings of some properties: encoding, indent mode, so they are based on a instance of XmlExtensions.
077 * A instance of XmlExtensions will be created by the static methods {@link create(Charset)},
078 * the constructors are private. This is a design attempt ajar to factory patterns. 
079 * The using of <code>new XmlExtension(...)</code> instead <code>create(...)</code> will be 
080 * adequate, but here create() is the decision.  
081 *
082<hr/>
083<pre>
084date       who        change
0852007-01-02 JcHartmut  create() with Charset, some methods will be non static.
0862006-03-07 JcHartmut  beautification, replaceWhiteSpaceWith1Space.
0872005-06-00 JcHartmut  initial revision
088*
089</pre>
090<hr/>
091 */
092public class XmlExtensions
093{
094  /** The data held in the subclass.*/
095  //private XmlMode mode;
096
097  /** The standard charset of this instance.*/
098  //private static String sEncodingStandard = "ISO-8859-1";
099
100  /** The xhtml namespace with prefix xhtml: xmlns:xhtml="http://www.w3.org/1999/xhtml" */
101  //public final static Namespace xhtml = Namespace.getNamespace("xhtml","http://www.w3.org/1999/xhtml");
102  
103  /** Class hold some properties of transformation.  
104   */
105  public static class XmlMode
106  {
107    private static final int kConvertText = 1;
108
109    /** If this bit is setted in convertMode, a XML-output,
110     * not textual output ist produced.
111     */
112    private static final int mConvertXml        = 0x10;
113
114    /** The XML-output-conversion should be written in UFT8-Form.
115     * @deprecated
116     * */
117    private static final int kConvertXmlUTF8    = mConvertXml + 0x0;
118
119    /** The XML-output-conversion should be written in ISO8859-Form.
120     * @deprecated
121     * */
122    private static final int kConvertXmlIso8859 = mConvertXml + 0x1;
123
124    /** The XML-output-conversion should be written in ISO8859-Form. 
125     * @deprecated
126     * */
127    private static final int kConvertXmlASCII = mConvertXml + 0x2;
128
129    /** The encoding charset of this instance.*/
130    private String sEncoding = "ISO-8859-1";
131    
132
133    private int convertMode = kConvertText;
134
135    /** The indent, may be "" or null if no wrapping. */
136    private String sIndent = "  ";
137
138    /**
139     * Sets the indent for wrapping.
140     * @param sIndent null if no wrapping, "" if wrapping without indent.
141     */
142    public void setIndent(String sIndent){ this.sIndent = sIndent; }
143
144    public void setText()      { convertMode = kConvertText; }
145    
146    /**Sets the encoding to the given encoding.
147     */
148    public void setEncoding(Charset encoding)   
149    { this.sEncoding=encoding.name(); 
150    }
151   
152    /**Sets the encoding to UTF8. UTF8 is a standard supporting all char sets.
153     */
154    public void setXmlUTF8()   { convertMode = kConvertXmlUTF8; sEncoding="UTF-8"; }
155   
156    
157    /**Sets the encoding to ISO-8849-1. This is the standard char set on windows systems in west european.
158     */
159    public void setXmlIso8859(){ convertMode = kConvertXmlIso8859; sEncoding = "ISO-8859-1";}
160    
161    /**Sets the encoding to US-ASCII. This is the minimal standard char set, using only 7-bit-chars.
162     */
163    public void setXmlASCII()  { convertMode = kConvertXmlASCII; sEncoding = "US-ASCII"; }
164
165    public boolean isText(){ return convertMode == kConvertText; }
166    public boolean isXml(){ return (convertMode & mConvertXml) == mConvertXml; }
167    public boolean isIndent(){ return sIndent != null; }
168    public String getIndent(){ return sIndent; }
169
170    /** Returns the encoding as input for XmlExtensions.writeXmlFile(xmlTree, sFileName, <b>encoding<b>)
171     * @deprecated
172    */
173    public int xxxgetEncoding()
174    throws XmlException
175    { switch(convertMode)
176      { case kConvertXmlUTF8   : return XmlExtensions.kEncoding_UTF_8;
177        case kConvertXmlIso8859: return XmlExtensions.kEncoding_ISO_8859_1;
178        default                : throw new XmlException("getEncoding? - but unknown"); //, Report.exitWithArgumentError);
179      }
180    }
181    
182    public String getEncoding()
183    { return sEncoding;
184    }
185  }
186
187
188
189  /** Internal constant for encoding the xml file. 
190   * It was an idea to code the encoding not with a string, but with a constant.
191   * But this system is not extensible, the using of java.nio.charset.Charset 
192   * is the better decision. See {@link create(Charset)}.
193   * @deprecated
194   */
195  public final static int kEncoding_ISO_8859_1 = 1;
196  
197  /** Internal constant for encoding the xml file.  
198   * @deprecated */
199  public final static int kEncoding_UTF_8 = 2;
200  /** Internal constant for encoding the xml file. 
201   * @deprecated */
202  public final static int kEncoding_ASCII = 3;
203
204
205
206
207
208
209  /** Gets the text content from the adressed node relativ to the given Element.
210   *  This method is useable conveniently if the path is given in textual form 
211   *  outside the directly programming in java, but at example with control files (may be in XML)
212   *  or from a conversion of XML data to a java routine (code generation).  
213      <br>
214      The Path, param sXmlPath, may be given in the followed form, 
215      <br>
216      example: "../element/@attribute" or "@name+$+{Parameter|@name|$}" <br>
217      <table border="1"><tr><th>sample</th><th>meaning</th></tr>
218      <tr><td><code>name             </code></td><td>Normalized text content of the element</td></tr>
219      <tr><td><code>@name            </code></td><td>content of the attribute</td></tr>
220      <tr><td><code>name/child       </code></td><td>Normalized text content of the child element</td></tr>
221      <tr><td><code>name/@attr       </code></td><td>content of the attribute within the element</td></tr>
222      <tr><td><code>name/name/@attr  </code></td><td>The path can be in any deepness</td></tr>
223      <tr><td><code>../../name       </code></td><td>parent from parent</td></tr>
224      <tr><td><code>"text"           </code></td>
225        <td>the constant text between "". It may be a part of the path concating with other parts with +, 
226        but it is also possible to return a text directly, without using the input xml element. 
227        This choice is conveniently, because the behaviour may be determined outside of a java programming,
228        and no special case will be produced if only a simple text is expected.</td>
229      </tr>
230      <tr><td><code>path+"text"+path </code></td><td>concation between the texts from pathes</td></tr>
231      <tr><td><code>path+c+path      </code></td><td>1 char between + is a added separator, it is the same as <code>+"c"+</code></td></tr>
232      <tr><td><code>path+c           </code></td><td>1 char on end after +, same as here <code>+"."</code></td></tr>
233      <tr><td><code>{apath|vpath|c}  </code></td>
234        <td>concation of all textes from <code><i>vpath</i></code> from all elements found with <code><i>apath</i></code>, 
235        with <code><i>c</i></code> as separator between the concatenated result strings. <br>
236        With using this expression as part of sXmlPath it is possible to concatenate the content of
237        more as one elements without using of a extra programmed loop.
238        </td>
239      </tr>
240      </table>
241      @param xml The Element the path starts from.
242      @param sXmlPath The path expression see above.
243      @param bInvalidIsOk If true, than return "" on invalid path, if false than return null on invalid path.
244      @return The requested text content, "" or null if the path is not valid.
245  */
246  @SuppressWarnings("unchecked")
247  public static String getTextFromPath(Element xml, String sXmlPath, boolean bInvalidIsOk)
248  {
249    boolean bDebug = false; //sXmlPath.startsWith("..");
250    StringBuffer sText = new StringBuffer(100);
251    boolean bError = false;
252    Element xmlChild = xml;
253    if(bDebug) System.out.println(sXmlPath);
254    int pos1 = 0;
255    while(pos1 < sXmlPath.length())
256    { //concat
257      char cc;
258      if( (cc = sXmlPath.charAt(pos1)) == '\'' || cc == '\"')
259      {
260        int posEnd = sXmlPath.indexOf(cc, pos1+1);   //same char either "..." or '...'
261        if(posEnd < 0) posEnd = sXmlPath.length();
262        sText.append(sXmlPath.substring(pos1+1, posEnd));
263        pos1 = posEnd +1;
264        if(pos1 < sXmlPath.length())
265        { pos1 = sXmlPath.indexOf('+', pos1);     //search + as concation char
266          if(pos1 < 0) pos1 = sXmlPath.length();  //no further concation.
267        }
268      }
269      else if(pos1 < (sXmlPath.length()-1) && sXmlPath.charAt(pos1+1) == '+'
270             || pos1 == (sXmlPath.length()-1)  //the last char
271             )
272      { //$+  simple char before concation is a concation char
273        sText.append(sXmlPath.charAt(pos1));
274        pos1 += 2;     //may be pos1 >= length()
275      }
276      else if(sXmlPath.charAt(pos1) == '{') pos1 = getMultiText(sText, xml, sXmlPath, pos1);
277      else
278      { //text from path
279        int posEnd  = sXmlPath.indexOf('+', pos1);
280        if(posEnd < 0) posEnd = sXmlPath.length();  //no concation: end of string
281        int posSep = sXmlPath.lastIndexOf('/', posEnd-1);
282        if(posSep > 0)
283        { if(bDebug) System.out.println("  child:"+ sXmlPath.substring(pos1, posSep)+":");
284          List listChildren = getChildren(xml, sXmlPath.substring(pos1, pos1 + posSep));
285          xmlChild = (listChildren.size()>0 ? (Element)(listChildren.get(0)) : null);
286          pos1 = posSep +1;
287        }
288        else xmlChild = xml;
289
290        if(xmlChild == null) bError = true;   //1 error force return null if !bInvalidIsOk
291        else
292        {
293          if(sXmlPath.charAt(pos1) == '@')
294          { String sContent = xmlChild.getAttributeValue(sXmlPath.substring(pos1+1, posEnd));
295            if(bDebug) System.out.println("              :" + sXmlPath.substring(pos1,posEnd) + ":" + xmlChild.getName() + "::" + sContent);
296            if(sContent == null) bError = true;   //1 error force return null if !bInvalidIsOk
297            else sText.append(sContent);
298          }
299          else
300          { xmlChild = xmlChild.getChild(sXmlPath.substring(pos1, posEnd));
301            if(xmlChild == null) bError = true;   //1 error force return null if !bInvalidIsOk
302            else sText.append(xmlChild.getTextNormalize());
303          }
304        }
305        pos1 = posEnd + 1;  //after posConcat or > length()
306      }
307    }
308    if(bError && !bInvalidIsOk) return null;
309    else return sText.toString();  //may be "" if invalid path
310  }
311
312
313  /** Gets a list of children, not only from the given xml Element, but also from a deeper level.
314   *  This method is useable conveniently if the path is given in textual form 
315   *  outside the directly programming in java, but at example with control files (may be in XML)
316   *  or from a conversion of XML data to a java routine (code generation).  
317      The Path, param sXmlPath, may be given in the followed form, 
318      <br>
319      examples: 
320      <ul><li><code>"element1/name"</code>: children of element1 with tag name "name"</li>
321          <li><code>"element1/*"</code>: all children of element1</li>
322          <li><code>"../name"</code>: all siblings with name</li>
323          <li><code>"../*"</code>: all siblings inclusive self</li>
324          <li><code>".."</code>: Only the parent of self</li>
325          <li><code>"../.."</code>: Only the parent of parent</li>
326          <li><code>"."</code>: Only self (it is the param xml itself)</li>
327      </ul>
328      If the path ends with <code>".."</code> or <code>"."</code>, the list contains only the addressed element, 
329      it is not a list getted with <code>org.jdom.Element.getChildren()</code>, it is a simple LinkedList 
330      with this one element. 
331      <br>
332      But if the path ends with an named element or "*", the returned list is a list 
333      getted with <code>org.jdom.Element.getChildren()</code>.
334      It means, that all operations may be applied to the list, including add of further siblings
335      using <code>thelist.listIterator().add(sibling);</code>     
336   * 
337   * @param xml The parent element.
338   * @param sXmlPath Path relativ from parent to the element from which the children are listed.
339   *        If the path is "." or empty, 
340   * @return List of children or null if any error in path.
341   */ 
342
343  @SuppressWarnings("unchecked")
344  public static List<org.jdom.Parent> getChildren(Element xml, String sXmlPath)
345  { int pos1 = 0;
346    boolean bPathFound = false;
347    while(pos1 < sXmlPath.length() && !bPathFound)
348    { int posSep = sXmlPath.indexOf('/', pos1);
349      if(posSep >=0)
350      { bPathFound = false;
351        if(sXmlPath.substring(pos1, posSep).equals(".."))
352        { if(xml != null) xml = xml.getParentElement();
353        }
354        else if(sXmlPath.substring(pos1, posSep).equals("."))
355        { //nothing, "." is the actual element
356        }
357        else
358        { //really a child
359          if(xml != null) xml = xml.getChild(sXmlPath.substring(pos1, posSep));
360        }
361        pos1 = posSep +1;
362      }
363      else bPathFound = true;
364    }
365    List<org.jdom.Parent> listChildren;
366    if(xml != null) 
367    { if(sXmlPath.substring(pos1).equals(".."))
368      { //the parent is the meant children
369        listChildren = new LinkedList<org.jdom.Parent>();
370        listChildren.add(xml.getParent());
371      }
372      else if(sXmlPath.substring(pos1).equals("."))
373      { //the addressed element directly
374        listChildren = new LinkedList<org.jdom.Parent>();
375        listChildren.add(xml);
376      }
377      else
378      { //jdom-like children list
379        listChildren = xml.getChildren(sXmlPath.substring(pos1));
380      }
381    }
382    else{ listChildren = null; }
383    return listChildren;
384  }
385
386
387
388
389
390  private static int getMultiText(StringBuffer sText, Element xml, String sXmlPath, int pos1)
391  { //boolean bOk = true;
392    //{
393    int posSepEnd = sXmlPath.indexOf('}', pos1);
394    int posSep1 = sXmlPath.indexOf('|', pos1);
395    int posSep2 = sXmlPath.indexOf('|', posSep1 +1);   //may be 0 if posSep1 = -1
396    if(posSepEnd < 0 || posSep1 < 0 || posSep2 < 0)
397    { sText.append("::ERROR::Syntay {||}:" + sXmlPath.substring(pos1));
398      //bOk = false;
399    }
400    else
401    { String sApath     = sXmlPath.substring(pos1+1, posSep1);
402      String sVpath     = sXmlPath.substring(posSep1+1, posSep2);
403      String sSeparator = sXmlPath.substring(posSep2+1, posSepEnd);
404      //System.out.println("getMultiText:" + sApath + ":" + sVpath + ":" );
405      List<org.jdom.Parent> listChildren = getChildren(xml, sApath);
406      Iterator<org.jdom.Parent> iterChildren = listChildren.iterator();
407      boolean bFirst = true;
408      while(iterChildren.hasNext())
409      { Element xmlChild = (Element)(iterChildren.next());
410        //System.out.println("  -"+ xmlChild.getName());
411        String sContent  = getTextFromPath(xmlChild, sVpath, true);
412        if(bFirst) bFirst = false;
413        else sText.append(sSeparator);
414        sText.append(sContent);
415      }
416    }
417    if(posSepEnd >0) return posSepEnd +1;
418    else return sXmlPath.length();  //error: end not found
419  }
420
421
422
423
424
425
426
427
428
429  /** Sets the specified element with the given value.
430      The Path may be given in the followed form (example): "../element/@attribute"
431      @param xml Element within the content is setted
432      @param sXmlPath Path selects the child within xml, into the sContent is setted.
433             The path should be given in form "../../name/name/dst".
434             The specification ".." means the parent, like XPATH.
435             All childs are created, if they don't exist.<br/>
436             The last element of the path may be a special specification:
437             <table border=1>
438             <tr><th>form</th><th>Description</th></tr>
439             <tr><td>name</td><td>the child with the name</td></tr>
440             <tr><td>.</td><td>The element itself (sXmlPath="." is the input element itself)</td></tr>
441             <tr><td>@name</td><td>example: "tag/@name". Sets the content into the attribute name of the tag</td></tr>
442             <tr><td>=</td><td>example: "tag/tag2/=". Sets the content into the element tag2,
443                               otherwise the content is appended to name2</td></tr>
444             <tr><td>!</td><td>example: "tag/tag2/!". Changes the name of the selected xml-Element tag2 to the sContent</td></tr>
445             </table>
446      @param sContent content set to the element, attribute or defines the name of the element.
447      @return true if succesfull, false if the element is not found or other error (if no effect).
448  */
449  public static boolean setTextToPath(Element xml, String sXmlPath, String sContent)
450  { boolean bOk = false;
451    if(sContent == null) sContent = "XXX";
452    Element xmlChild = xml;
453    int pos1 = 0;
454    int posSep;
455    if(sXmlPath.length()>0 && sContent != null)
456    { boolean bCont = true;
457      do
458      { posSep = sXmlPath.indexOf('/', pos1);
459        if(posSep < 0) //the last part of the path:
460        { posSep = sXmlPath.length();
461          bCont = false;
462        }
463        if(sXmlPath.substring(pos1, posSep).equals(".."))
464        { xmlChild = xmlChild.getParentElement();
465        }
466        else if(sXmlPath.substring(pos1, posSep).equals("."))
467        { //nothing, "." is the actual element
468        }
469        else if(sXmlPath.substring(pos1).startsWith("!"))
470        { //the rest is the new tag name, evaluate later.
471          bCont = false;
472        }
473        else if(sXmlPath.substring(pos1).startsWith("@"))
474        { //the rest is the atrribute name, evaluate later.
475          bCont = false;
476        }
477        else if(sXmlPath.substring(pos1).startsWith("="))
478        { //delete the content of the element before setting new.
479          xmlChild.setText("");  //delete the textual content
480        }
481        else
482        { //really a child
483          Element xmlChildNew;
484          xmlChildNew = xmlChild.getChild(sXmlPath.substring(pos1, posSep));
485          if(xmlChildNew == null)
486          { //no such child, create it:
487            xmlChild = new Element(sXmlPath.substring(pos1, posSep));
488          }
489          else
490          { xmlChild = xmlChildNew;
491          } //it is it.
492        }
493        if(bCont) { pos1 = posSep +1; }
494      } while(bCont && xmlChild != null);
495
496      if(xmlChild != null)
497      { if(sXmlPath.charAt(pos1) == '@')
498        { xmlChild.setAttribute(sXmlPath.substring(pos1+1), sContent);
499          bOk = true;
500        }
501        else if(sXmlPath.charAt(pos1) == '!')
502        { xmlChild.setName(sContent);
503          bOk = true;
504        }
505        else
506        { xmlChild.addContent(sContent);
507          bOk = true;
508        }
509      }
510    }
511    return bOk;
512  }
513
514
515
516  /** Reads a xml file and convert it to a internal xml tree. This is a simple frame arround SAXBuilder.
517      On any problem an exception is thrown. This may be a file-not-found or a parsing problem with the content of the xml-file.
518
519    @return The root element
520  */
521  public static Element readXmlFile(File file)
522  throws XmlException
523  { try
524    { SAXBuilder builder = new SAXBuilder();
525      Document doc = builder.build( file );
526      return doc.getRootElement();
527    }
528    catch(JDOMException exception)
529    { throw new XmlException("conversion readed xml-File " + file.getAbsolutePath() + exception.getMessage());
530    }
531    catch(IOException exception)
532    { throw new XmlException("reading xml-File " + file.getAbsolutePath() + exception.getMessage());
533    }
534  }
535
536
537
538  /** Reads a xml file and convert it to a internal xml tree. This is a more complex frame arround SAXBuilder.
539   * The special solution is the followed:<br>
540   * * By reading from file every newline is converted to a space char, except the first line.
541   * It means, the file have only 2 lines, the head line and a very long second line
542   * with the whole content.<br>
543   * * After reading all whitespaces are converted to one space using {@link replaceWhiteSpaceWith1Space(Element, boolean)}.<br>
544   * <br>
545   * The effect is the followed: The textual content from the inputted XML file
546   * doesn  't contain any whitespaces, or line feed. Every white space is converted to one spaces.
547   * More as one space in serial doesn't exist. No line structure is given.
548   * If a prior beautificated input is there, it is now non-beatificated, simple.
549   * This is a format appropriately useable for document processing with document formatting structures.
550   * <br>
551   * @param file The file from which xml is readed.
552   * @return The root element without whitespaces.
553  */
554  public static Element readXmlFileTrimWhiteSpace(File file)
555  throws XmlException
556  {
557
558
559    class InputStreamSpecial extends InputStream
560    {
561      private final InputStream in;
562      int cNext = -1;
563      int lineCt = 0;
564
565      InputStreamSpecial(File file)
566      { InputStream in;
567        try{ in = new FileInputStream(file);}
568        catch(FileNotFoundException ex){ in = null;}
569        this.in = in;
570      }
571
572      public int read() throws IOException
573      {
574        if(in != null)
575        { int cc;
576          if(cNext >0)
577          { cc = cNext;
578          }
579          else
580          {
581          }
582          cc = in.read();
583          if(cc == 0x0d && lineCt >0)
584          { cc = 0x20;
585          }
586          if(cc == 0x0a)
587          { if(lineCt >0)  //first line: no change!
588            { cc = 0x20;
589            }
590            lineCt +=1;
591          }
592          return cc;
593        }
594        else return -1;
595      }
596
597      public boolean isReadable(){ return in != null; }
598    }//InputStreamSpecial
599
600    //use the special reader inside this method:
601    InputStreamSpecial input = new InputStreamSpecial(file);
602    if(!input.isReadable())
603    { throw new XmlException("file not found: " + file.getAbsolutePath());
604    }
605    try
606    { SAXBuilder builder = new SAXBuilder();
607      Document doc = builder.build( input );
608      //Document doc = builder.build( file );
609      Element root = doc.getRootElement();
610      replaceWhiteSpaceWith1Space(root, false);
611      return root;
612    }
613    catch(Exception exception)
614    { throw new XmlException("conversion given xml-String " + exception.getMessage());
615    }
616  }
617
618
619  /** Replaces white spaces of all text()-content with one space, by keeping
620   * the inner leading and trailing spaces.
621   * The original behaviour of XML outputter is the possibility of replacement
622   * of all whitespace with one space, but only in combination with trimming
623   * all leading and trailing spaces with no replacement with one spaces.
624   * This property sometimes is not useable, at example in a combination of
625   * "text &lt;b>This is bold &lt;/b>The last space is a bold space."
626   * The trimming ignores the space and the text would be corrupted.
627   * @param xml The element to be white-spaces-trimmed.<br/>
628   * @param bContendText Normally let it false by calling outside!
629   *        If false, than test of containing text(), if it contains text() than
630   *        delete the first leading and the last trailing whitespace
631   *        and call recursively for inner elements with true.<br/>
632   *        If true than normalize, but keep one space
633   *        instead of possible leading or trainling white spaces.
634   *        This is the correct choice for inner (recursively) calling.<br/>
635   */
636
637  @SuppressWarnings("unchecked")
638  public static void replaceWhiteSpaceWith1Space(Element xml, boolean bContendText)
639  { boolean bNewContendText = false;
640    if(!bContendText)
641    { //it is a textual paragraph if the element is a <p>-tag.
642      String sTest = xml.getTextNormalize();  //get all text
643      if(sTest.length()>0)
644      { bContendText = true;
645        bNewContendText = true;
646      }
647      //bContendText = xml.getName().equals("p");
648    }
649    { List listChild = xml.getContent();
650      //List listChildNew = new LinkedList();
651      Iterator iterChild = listChild.iterator();
652      bNewContendText = false;  //test: what is with preserving the first and last space anyway
653
654      boolean bFirst = bNewContendText;  //only on first element of first Text
655      while(iterChild.hasNext())
656      { Content xmlChild = (Content)iterChild.next();
657        //xmlChild.detach();
658        if(xmlChild instanceof org.jdom.Text && bContendText)
659        { org.jdom.Text text = ((org.jdom.Text)(xmlChild));
660          String sContent = text.getText();
661          int nChars = sContent.length();
662          if(nChars>0)
663          { char char1 = sContent.charAt(0);
664            char char9 = sContent.charAt(nChars-1);
665            //:NOTE: \r\n is changed to \n on input parser.
666            sContent = org.jdom.Text.normalizeString(sContent);
667            //without leading and trailing spaces, only one space instead whitespace.
668            if(sContent.startsWith("dieses hier"))
669            {
670              sContent = "dieses hier ";
671            }
672            if(char1 == ' ' || char1 == '\n' || char9 == ' ' || char9 == '\n' )
673            { //only of any reason to change, fill sContent new.
674              sContent = ( (!bFirst && char1 == ' ') ? " " : "")
675                       + ( (!bFirst && char1 == '\n') ? " " : "")
676                       + sContent
677                       + (( ( iterChild.hasNext()  //not the last element
678                            || !bNewContendText    //but always on inner elements
679                            )
680                          &&(nChars >1)          //input at least 2 chars
681                          && sContent.length()>0 //any content else
682                          && (char9 == ' '|| char1 == '\n') //last char is white space
683                          )
684                         ? " "                   //than append space.
685                         : ""
686                         )
687                       ;
688            }
689          }
690          text.setText(sContent);
691          //listChildNew.add(new org.jdom.Text(sContent));
692        }
693        else if(xmlChild instanceof org.jdom.Element)
694        { //process it recursively
695          replaceWhiteSpaceWith1Space((org.jdom.Element)xmlChild, bContendText);
696          //listChildNew.add(xmlChild);
697        }
698        else
699        { //listChildNew.add(xmlChild);  //unchanged.
700        }
701        bFirst = false;
702      }
703    }
704  }
705
706
707
708  /**Beautificates the content of the Element with respecting of textual content preserving.
709   * Copies the Element content inclusive childs into a new Element and returns the new Element.
710   * If an element (child element) contents text(), than the content is copied without changes,
711   * like xml:space="preserve".
712   * <br>
713   * If no text() is containing, all empty (only whitespace)-text()-
714   * elements are ignored, and between elements, linefeed and indent is added.
715   * So the output written with org.jdom.output.Format.getRawFormat() is beautificated.
716   * This method is called recursively if child Elements are present.
717   * <br>
718   * Example: if the input xml tree contains:
719   * <pre>&lt;sampleTag>&lt;innerTag>&lt;p>This is text&lt;b> with bold &lt;/b>and i&lt;b>nn&lt;/b>er bold text&lt;/p>&lt;/innerTag>&lt;/sampleTag>
720   * </pre>
721   * The output will be:<pre>
722   * &lt;sampleTag>
723   *   &lt;innerTag>
724   *     &lt;p>This is text&lt;b> with bold &lt;/b>and i&lt;b>nn&lt;/b>er bold text&lt;/p>
725   *   &lt;/innerTag>
726   * &lt;/sampleTag>
727   * </pre>
728   * <br>
729   * Another way to get beautificated output may be using the formatting possibilities of JDOM,
730   * but they take no consideration of inner textual elements. If beautification is choiced,
731   * all elements are beautificated and the original space structure may be corrupted.
732   * The above example will written in form
733   * <pre>
734   * &lt;sampleTag>
735   *   &lt;innerTag>
736   *     &lt;p>This is text
737   *       &lt;b> with bold
738   *       &lt;/b>
739   *       and i
740   *       &lt;b>nn
741   *       &lt;/b>
742   *       er bold text
743   *     &lt;/p>
744   *   &lt;/innerTag>
745   * &lt;/sampleTag>
746   * </pre>
747   * In this sample always a whitespace is produced between elements in output,
748   * even if the user will not have a space.
749   * <br>
750   * Note: The identation is limited to approximately a half page width (length of sIndent).
751   * <br>
752   * Note: other content as Element and Text are not supported yet (:TODO:).
753   *
754   *
755   * @param xml The Input element tree.
756   * @return A new XML tree with beautificated content.
757   */
758  public static Element beautificationBewareTextContent(Element xml)
759  { //first call
760    return BeautificationNoTextContent.beautificationNoTextContent(xml, false, 0);
761  }
762
763  /** Inner class for Beautification */
764  private static class BeautificationNoTextContent
765  {
766
767    /** constant String to realize indent.*/
768    static final String sIndent = "\n                                                               ";
769
770    /** Maximal number of indentation, determined by length of sIndent.*/
771    static final int nMaxIndent = sIndent.length()/2 -2;
772
773
774    /** Inner Recursively call variant of the public method.
775     *
776     * @param xml Input Element
777     * @param bContendText True, than text()-content is present in xml Input or parents.
778     * @param nLevel level of indentation, count by every recursively call.
779     * @return
780     */
781    @SuppressWarnings("unchecked")
782    private static Element beautificationNoTextContent(Element xml, boolean bContendText, int nLevel)
783    { Element xmlOut = new Element(xml.getName(), xml.getNamespace());
784
785      { //copy Attributes
786        { List attributes = xml.getAttributes();
787          Iterator iter = attributes.iterator();
788          while(iter.hasNext())
789          { Attribute attrib = ((Attribute)(iter.next()));
790            xmlOut.setAttribute(attrib.getName(), attrib.getValue());
791          }
792        }
793      }
794
795      { //copy AdditionalNamespaces
796        List listAddNs = xml.getAdditionalNamespaces();
797        if(listAddNs.size() >0)
798        { Iterator iter = listAddNs.iterator();
799          while(iter.hasNext())
800          { Namespace ns = (Namespace)(iter.next());
801            xmlOut.addNamespaceDeclaration(ns);
802          }
803        }
804      }
805
806      if(nLevel > nMaxIndent ){ nLevel = nMaxIndent; } //no further indent.
807      if(false && !bContendText)
808      { //test of containment of text(), than set bContendText, with effect also to childs.
809        String sTest = xml.getTextNormalize();  //get all text with trimmed spaces.
810        if(sTest.length()>0)
811        {  bContendText = true;
812        }
813        //bContendText = xml.getName().equals("p");  //older test
814      }
815      { List listChild = xml.getContent();
816        //List listChildNew = new LinkedList();
817        Iterator iterChild = listChild.iterator();
818        while(iterChild.hasNext())
819        { Content xmlChild = (Content)iterChild.next();
820          //xmlChild.detach();  //:NOTE: detach causes a Iterator exception!
821          if(xmlChild instanceof org.jdom.Text) // && bContendText)
822          { org.jdom.Text text = ((org.jdom.Text)(xmlChild));
823            if(bContendText)
824            { //if a textual content was always detected before, copy exactly.
825                String sContent = text.getText();
826              xmlOut.addContent(sContent);  //add it.
827            }
828            else
829            { //not textual content before:
830                String sContent = text.getText();
831                int posStart = 0; int posEnd = sContent.length();
832                while(!bContendText && posStart < posEnd)
833                { char cc = sContent.charAt(posStart);
834                        if("\r\n \t".indexOf(cc) >=0){ posStart+=1; }
835                        else { bContendText = true; }
836                }
837              if(bContendText)
838                { //really a text
839                  xmlOut.addContent(sContent); //.substring(posStart));  //add it.
840                }
841              else
842              { //ignore the text, if no textual content before and the text is empty. 
843                //It is a beautification from input.
844                //The beautification will be added in a own kind. 
845              }
846            }
847          }
848          else if(xmlChild instanceof Element)
849          { //other element:
850            if(!bContendText)
851            { //insert indent before:
852              xmlOut.addContent(sIndent.substring(0, 1+2*nLevel));
853            }
854            //call recursively this method, after it add the result element.
855            xmlOut.addContent(beautificationNoTextContent((Element)xmlChild, bContendText, nLevel+1));
856          }
857
858        }
859
860        if(false && !bContendText && nLevel >0)
861        { //insert line feed and indent after the element:
862          xmlOut.addContent(sIndent.substring(0, 1+2*(nLevel-1)));
863        }
864      }
865      return xmlOut;
866    }
867  }//class BeautificationNoTextContent
868
869  /** Write the content of the xml-node in the report
870  */
871  public static void reportContentElement(Element xml, Report report)
872  {
873    report.reportln(Report.info, "reportContentElement: " + xml.getName());
874  }
875
876
877  /**Transform a xml tree to a new tree. The input tree is started from an detached xml Element.
878     @param xmlOutResult instance of Result to accumalate the output.
879  */
880  private static void xslTransform(Element xmlInput, File fXsl, Result xmlOutResult)
881  throws XmlException
882  {
883    if(!fXsl.exists())
884    { throw new XmlException("xslTransformation: xsl-file not found: " + fXsl.getName());
885    }
886    Transformer xslTransformer = null;
887    try
888    {
889      xslTransformer = TransformerFactory.newInstance().newTransformer(new StreamSource(fXsl));
890    }
891    catch (TransformerException exception)
892    { throw new XmlException("xslTransformation: error in xsl-file: " + fXsl.getName() + exception.getMessage());
893    }
894    Document docIn = new Document();
895    xmlInput.detach(); //it may have used in a transformation before.
896    docIn.setRootElement(xmlInput);
897    try{ xslTransformer.transform(new JDOMSource(docIn), xmlOutResult); }
898    catch (TransformerException exception)
899    { throw new XmlException("xslTransformation: error in xsl-file: " + fXsl.getName() + exception.getMessage());
900    }
901
902  }
903
904
905
906  /**XSL-Transformation of a xml tree to a new tree. The input tree is started from an detached xml Element.
907   * Internally javax.xml is used.
908     @return The new xml tree. The returned root element has not a parent, it is detached.
909             So it can be added to any other tree.
910  */
911  public static Element xslTransformXml(Element xmlInput, File xslFile)
912  throws XmlException
913  {
914    JDOMResult xmlOutResult = new JDOMResult();
915    xslTransform(xmlInput, xslFile, xmlOutResult);
916    if(!xmlOutResult.getDocument().hasRootElement())
917    { throw new XmlException("xslTransformationXml: no root element produced");
918    }
919    //if success than return the detached root element from conversion document.
920    //The document is further unnecessary and will be deleted by the garbage collector.
921    Element xmlOut = xmlOutResult.getDocument().getRootElement();
922    xmlOut.detach();
923    return xmlOut;
924  }
925
926
927
928
929  /**Transform a xml tree to a string. The input tree is started from an detached xml Element.
930     @return the string.
931  */
932  public static String xslTransformString(Element xmlInput, File xslFile)
933  throws XmlException
934  {
935    ByteArrayOutputStream outputStream = new ByteArrayOutputStream(10000);  //increased if necessary
936    Result xmlOutResult = new StreamResult(outputStream);
937
938    xslTransform(xmlInput, xslFile, xmlOutResult);
939    return outputStream.toString();
940  }
941
942
943  /**Output a xml tree to a file with beautification of the output,
944   * but beware all spaces inside textual content. It calls internally {@link beautificationNoTextContent(Element)}
945   * <br>
946   * @param xmlRoot The root element
947   * @param fileOut The file to write out. The file will be created or replaced.
948   * @throws FileNotFoundException If the fileOut doesn't match.
949  */
950  public static void writeXmlBeautificatedTextFile(Element xmlRoot, File fileOut, Charset encoding)
951  throws XmlException, FileNotFoundException
952  { Element xmlRootBeautificated = beautificationBewareTextContent(xmlRoot);
953    XmlMode mode = new XmlMode(); 
954    mode.setEncoding(encoding);
955    mode.setIndent(null);
956    writeXmlFile(xmlRootBeautificated, fileOut, mode);
957  }
958
959
960
961  /**Output a xml tree to a file.
962   * @throws FileNotFoundException if the sFileOut doesn't match
963   * @deprecated
964  */
965  public static void writeXmlFile(Element xmlRoot, String sFileOut)
966  throws XmlException, FileNotFoundException
967  { XmlMode mode = new XmlMode();
968        writeXmlFile(xmlRoot, new File(sFileOut), mode);
969  }
970
971
972
973
974
975
976
977  
978  
979  
980  
981  
982  
983
984
985  /**Output a xml tree to a file.
986   * if setIndent() is called with null-Argument, than write without wrapping.
987   *  Otherwise, wrap with the given indent.
988   *  If the output is written with indent, it is revitalize readable. Some times, the text should be written
989   *  with exactly spaces, not with white spaces, from there setIndent(null) should be called before.
990   *  The output will be written with the charset defined by {@link create(Charset)} or {@link setEncoding(int)}.
991   *  @param xmlRoot The root Element of the created output XML-File.
992   *  @param fileOut This file will be created or replaced.
993   * @exception  FileNotFoundException  if the file exists but is a directory
994   *                   rather than a regular file, does not exist but cannot
995   *                   be created, or cannot be opened for any other reason
996   * @throws IOException if any error on writing at file system
997   * @exception  SecurityException  if a security manager exists and its
998   *               <code>checkWrite</code> method denies write access
999   *               to the file.
1000  */
1001  public void writeXmlFile(Element xmlRoot, File fileOut)
1002  throws XmlException, FileNotFoundException
1003  { FileOutputStream fOut = null;
1004    //try
1005    { fOut = new FileOutputStream(fileOut);
1006      Document docu = new Document();
1007      docu.setRootElement(xmlRoot);
1008      XMLOutputter writerXml = new XMLOutputter();
1009      org.jdom.output.Format format = org.jdom.output.Format.getRawFormat();
1010      //format.setNewlines(bIndent);
1011      //xmlFormat.setIndent("  ");
1012      //format.setIndent("  ");
1013      //format.setLineSeparator("\n");
1014      //format.setExpandEmptyElements(true);
1015      writerXml.setFormat(format);
1016      try
1017      { writerXml.output( docu,  fOut );
1018        fOut.close();
1019      }
1020      catch(IOException exc)
1021      { throw new XmlException("Any error writing file:" + exc.getMessage());
1022      }
1023    }
1024    //catch(IOException exception){ throw new XmlException("write xml-output-file: " + fileOut.getAbsolutePath()); }
1025  }
1026
1027
1028
1029  /**Output a xml tree to a file.
1030   *  @param xmlRoot The root Element of the created output XML-File.
1031   *  @param fileOut This file will be created or replaced.
1032   *  @param sEncoding The encoding, typicall "ISO-8859-1"
1033   * @exception  FileNotFoundException  if the file exists but is a directory
1034   *                   rather than a regular file, does not exist but cannot
1035   *                   be created, or cannot be opened for any other reason
1036   * @throws IOException if any error on writing at file system
1037   * @exception  SecurityException  if a security manager exists and its
1038   *               <code>checkWrite</code> method denies write access
1039   *               to the file.
1040  */
1041  public static void writeXmlFile(Element xmlRoot, File fileOut, XmlMode mode)
1042  throws XmlException, FileNotFoundException
1043  { FileOutputStream fOut = null;
1044    //try
1045    { fOut = new FileOutputStream(fileOut);
1046      Document docu = new Document();
1047      docu.setRootElement(xmlRoot);
1048      XMLOutputter writerXml = new XMLOutputter();
1049      org.jdom.output.Format format = org.jdom.output.Format.getRawFormat();
1050      format.setEncoding(mode.getEncoding());
1051      //format.setExpandEmptyElements(true);
1052      writerXml.setFormat(format);
1053      try
1054      { writerXml.output( docu,  fOut );
1055        fOut.close();
1056      }
1057      catch(IOException exc)
1058      { throw new XmlException("Any error writing file:" + exc.getMessage());
1059      }
1060    }
1061    //catch(IOException exception){ throw new XmlException("write xml-output-file: " + fileOut.getAbsolutePath()); }
1062  }
1063
1064
1065
1066
1067  /**Output a xml tree to a file.
1068   *  @param xmlRoot The root Element of the created output XML-File.
1069   *  @param fileOut This file will be created or replaced.
1070   *  @param sEncoding The encoding, typicall "ISO-8859-1"
1071   * @exception  FileNotFoundException  if the file exists but is a directory
1072   *                   rather than a regular file, does not exist but cannot
1073   *                   be created, or cannot be opened for any other reason
1074   * @throws IOException if any error on writing at file system
1075   * @exception  SecurityException  if a security manager exists and its
1076   *               <code>checkWrite</code> method denies write access
1077   *               to the file.
1078  */
1079  public static void writeXmlDirect(Element xml, File fileOut, String sEncoding)
1080  throws XmlException, FileNotFoundException
1081  { FileWriter out = null;
1082    //try
1083    { try
1084                        {
1085                                out = new FileWriter(fileOut);
1086        writeXmlDirect(xml, out, sEncoding);
1087                        } catch (IOException e)
1088                        {
1089                                // TODO Auto-generated catch block
1090                                e.printStackTrace();
1091                        }
1092    }
1093  }  
1094  /**Output a xml tree to a file.
1095   *  @param xmlRoot The root Element of the created output XML-File.
1096   *  @param fileOut This file will be created or replaced.
1097   *  @param sEncoding The encoding, typicall "ISO-8859-1"
1098   * @exception  FileNotFoundException  if the file exists but is a directory
1099   *                   rather than a regular file, does not exist but cannot
1100   *                   be created, or cannot be opened for any other reason
1101   * @throws IOException if any error on writing at file system
1102   * @exception  SecurityException  if a security manager exists and its
1103   *               <code>checkWrite</code> method denies write access
1104   *               to the file.
1105  */
1106  @SuppressWarnings("unchecked")
1107  public static void writeXmlDirect(Element xml, Writer out, String sEncoding)
1108  throws XmlException, IOException
1109  { //try
1110    out.write("<" + xml.getName());
1111        { //copy Attributes
1112      { List attributes = xml.getAttributes();
1113        Iterator iter = attributes.iterator();
1114        while(iter.hasNext())
1115        { Attribute attrib = ((Attribute)(iter.next()));
1116          //xmlOut.setAttribute(attrib.getName(), attrib.getValue());
1117          out.write(" " + attrib.getName() + "=" + "\"" + attrib.getValue() + "\"");
1118        }
1119      }
1120    }
1121    out.write(">");
1122        
1123    { //copy AdditionalNamespaces
1124      List listAddNs = xml.getAdditionalNamespaces();
1125      if(listAddNs.size() >0)
1126      { Iterator iter = listAddNs.iterator();
1127        while(iter.hasNext())
1128        { Namespace ns = (Namespace)(iter.next());
1129          out.write(" xmlns:" + ns.getPrefix() + "=\"" + ns.getURI() + "\"");           
1130        }
1131      }
1132    }
1133
1134    { List listChild = xml.getContent();
1135      //List listChildNew = new LinkedList();
1136      Iterator iterChild = listChild.iterator();
1137      while(iterChild.hasNext())
1138      { Content xmlChild = (Content)iterChild.next();
1139        //xmlChild.detach();  //:NOTE: detach causes a Iterator exception!
1140        if(xmlChild instanceof org.jdom.Text) // && bContendText)
1141        { org.jdom.Text text = ((org.jdom.Text)(xmlChild));
1142          String sContent = text.getText();
1143          //if(sContent.indexOf('<')>0 || )
1144          //if(sContent)
1145          out.write(sContent);
1146        }
1147        else if(xmlChild instanceof Element)
1148        { //other element:
1149          //call recursively this method, after it add the result element.
1150          writeXmlDirect((Element)xmlChild, out, sEncoding);
1151        }
1152
1153      }
1154
1155    }
1156  }
1157
1158
1159
1160
1161
1162}
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176