2013-05-15 44 views
5

Tôi có file xml này mà tôi muốn chuyển nó sang một khung dữ liệu:làm thế nào để chuyển đổi tập tin xml vào một khung dữ liệu trong R

data.xml

<?xml version="1.0" encoding="UTF-8" standalone="yes" ?> 
- <graph_data xmlns:ns2="http://www.w3.org/2005/Atom"> 
    <graph_property name="calculation_method" value="Geo Mean" /> 
    <graph_property name="graph_type" value="TIME" /> 
- <measurement id="521406"> 
    <alias>site4</alias> 
- <bucket_data> 
- <bucket id="1" name="2013-MAY-14 07:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="21" /> 
    <perf_data unit="seconds" value="3.102" /> 
    </bucket> 
- <bucket id="2" name="2013-MAY-14 08:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="13" /> 
    <perf_data unit="seconds" value="3.052" /> 
    </bucket> 
- <bucket id="3" name="2013-MAY-14 09:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="15" /> 
    <perf_data unit="seconds" value="3.387" /> 
    </bucket> 
- <bucket id="4" name="2013-MAY-14 10:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="15" /> 
    <perf_data unit="seconds" value="3.338" /> 
    </bucket> 
- <bucket id="5" name="2013-MAY-14 11:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="10" /> 
    <perf_data unit="seconds" value="2.149" /> 
    </bucket> 
- <bucket id="6" name="2013-MAY-14 12:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="13" /> 
    <perf_data unit="seconds" value="3.202" /> 
    </bucket> 
- <bucket id="7" name="2013-MAY-14 01:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="18" /> 
    <perf_data unit="seconds" value="2.883" /> 
    </bucket> 
- <bucket id="8" name="2013-MAY-14 02:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="11" /> 
    <perf_data unit="seconds" value="2.582" /> 
    </bucket> 
- <bucket id="9" name="2013-MAY-14 03:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="15" /> 
    <perf_data unit="seconds" value="2.769" /> 
    </bucket> 
- <bucket id="10" name="2013-MAY-14 04:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="12" /> 
    <perf_data unit="seconds" value="2.669" /> 
    </bucket> 
- <bucket id="11" name="2013-MAY-14 05:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="17" /> 
    <perf_data unit="seconds" value="2.830" /> 
    </bucket> 
- <bucket id="12" name="2013-MAY-14 06:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="12" /> 
    <perf_data unit="seconds" value="2.591" /> 
    </bucket> 
- <bucket id="13" name="2013-MAY-14 07:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="17" /> 
    <perf_data unit="seconds" value="3.213" /> 
    </bucket> 
- <bucket id="14" name="2013-MAY-14 08:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="10" /> 
    <perf_data unit="seconds" value="2.653" /> 
    </bucket> 
- <bucket id="15" name="2013-MAY-14 09:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="15" /> 
    <perf_data unit="seconds" value="2.935" /> 
    </bucket> 
- <bucket id="16" name="2013-MAY-14 10:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="18" /> 
    <perf_data unit="seconds" value="2.495" /> 
    </bucket> 
- <bucket id="17" name="2013-MAY-14 11:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="10" /> 
    <perf_data unit="seconds" value="3.169" /> 
    </bucket> 
- <bucket id="18" name="2013-MAY-15 12:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="16" /> 
    <perf_data unit="seconds" value="2.789" /> 
    </bucket> 
- <bucket id="19" name="2013-MAY-15 01:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="16" /> 
    <perf_data unit="seconds" value="3.245" /> 
    </bucket> 
- <bucket id="20" name="2013-MAY-15 02:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="17" /> 
    <perf_data unit="seconds" value="3.281" /> 
    </bucket> 
- <bucket id="21" name="2013-MAY-15 03:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="12" /> 
    <perf_data unit="seconds" value="3.773" /> 
    </bucket> 
- <bucket id="22" name="2013-MAY-15 04:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="21" /> 
    <perf_data unit="seconds" value="2.648" /> 
    </bucket> 
- <bucket id="23" name="2013-MAY-15 05:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="11" /> 
    <perf_data unit="seconds" value="3.291" /> 
    </bucket> 
- <bucket id="24" name="2013-MAY-15 06:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="12" /> 
    <perf_data unit="seconds" value="3.084" /> 
    </bucket> 
    </bucket_data> 
- <graph_option> 
    <data_cell name="perfwarning" unit="seconds" value="-" /> 
    <data_cell name="perfcritical" unit="seconds" value="-" /> 
    <data_cell name="availwarning" unit="percent" value="-" /> 
    <data_cell name="availcritical" unit="percent" value="-" /> 
    <data_cell name="bucketsize" unit="seconds" value="3600" /> 
    <data_cell name="rows" unit="#" value="24" /> 
    <data_cell name="pagecomponent" unit="seconds" value="Total Time" /> 
    <data_cell name="avg_perf" unit="seconds" value="2.949" /> 
    <data_cell name="avg_avail" unit="percent" value="100.00" /> 
    <data_cell name="total_datapoint_count" unit="#" value="347" /> 
    <data_cell /> 
    </graph_option> 
    </measurement> 
- <measurement id="521406"> 
    <alias>Site3</alias> 
- <bucket_data> 
- <bucket id="1" name="2013-MAY-14 07:00 AM"> 
    <avail_data unit="percent" value="85.71" /> 
    <data_count unit="#" value="18" /> 
    <perf_data unit="seconds" value="6.503" /> 
    </bucket> 
- <bucket id="2" name="2013-MAY-14 08:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="13" /> 
    <perf_data unit="seconds" value="6.330" /> 
    </bucket> 
- <bucket id="3" name="2013-MAY-14 09:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="15" /> 
    <perf_data unit="seconds" value="7.242" /> 
    </bucket> 
- <bucket id="4" name="2013-MAY-14 10:00 AM"> 
    <avail_data unit="percent" value="93.33" /> 
    <data_count unit="#" value="14" /> 
    <perf_data unit="seconds" value="7.083" /> 
    </bucket> 
- <bucket id="5" name="2013-MAY-14 11:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="10" /> 
    <perf_data unit="seconds" value="7.087" /> 
    </bucket> 
- <bucket id="6" name="2013-MAY-14 12:00 PM"> 
    <avail_data unit="percent" value="76.92" /> 
    <data_count unit="#" value="10" /> 
    <perf_data unit="seconds" value="6.197" /> 
    </bucket> 
- <bucket id="7" name="2013-MAY-14 01:00 PM"> 
    <avail_data unit="percent" value="83.33" /> 
    <data_count unit="#" value="15" /> 
    <perf_data unit="seconds" value="6.772" /> 
    </bucket> 
- <bucket id="8" name="2013-MAY-14 02:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="11" /> 
    <perf_data unit="seconds" value="5.832" /> 
    </bucket> 
- <bucket id="9" name="2013-MAY-14 03:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="15" /> 
    <perf_data unit="seconds" value="8.513" /> 
    </bucket> 
- <bucket id="10" name="2013-MAY-14 04:00 PM"> 
    <avail_data unit="percent" value="91.67" /> 
    <data_count unit="#" value="11" /> 
    <perf_data unit="seconds" value="7.190" /> 
    </bucket> 
- <bucket id="11" name="2013-MAY-14 05:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="17" /> 
    <perf_data unit="seconds" value="6.373" /> 
    </bucket> 
- <bucket id="12" name="2013-MAY-14 06:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="12" /> 
    <perf_data unit="seconds" value="8.440" /> 
    </bucket> 
- <bucket id="13" name="2013-MAY-14 07:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="17" /> 
    <perf_data unit="seconds" value="6.318" /> 
    </bucket> 
- <bucket id="14" name="2013-MAY-14 08:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="10" /> 
    <perf_data unit="seconds" value="6.374" /> 
    </bucket> 
- <bucket id="15" name="2013-MAY-14 09:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="15" /> 
    <perf_data unit="seconds" value="6.773" /> 
    </bucket> 
- <bucket id="16" name="2013-MAY-14 10:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="18" /> 
    <perf_data unit="seconds" value="6.274" /> 
    </bucket> 
- <bucket id="17" name="2013-MAY-14 11:00 PM"> 
    <avail_data unit="percent" value="90.00" /> 
    <data_count unit="#" value="9" /> 
    <perf_data unit="seconds" value="5.881" /> 
    </bucket> 
- <bucket id="18" name="2013-MAY-15 12:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="16" /> 
    <perf_data unit="seconds" value="5.630" /> 
    </bucket> 
- <bucket id="19" name="2013-MAY-15 01:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="16" /> 
    <perf_data unit="seconds" value="6.585" /> 
    </bucket> 
- <bucket id="20" name="2013-MAY-15 02:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="17" /> 
    <perf_data unit="seconds" value="7.394" /> 
    </bucket> 
- <bucket id="21" name="2013-MAY-15 03:00 AM"> 
    <avail_data unit="percent" value="91.67" /> 
    <data_count unit="#" value="11" /> 
    <perf_data unit="seconds" value="6.427" /> 
    </bucket> 
- <bucket id="22" name="2013-MAY-15 04:00 AM"> 
    <avail_data unit="percent" value="95.24" /> 
    <data_count unit="#" value="20" /> 
    <perf_data unit="seconds" value="7.140" /> 
    </bucket> 
- <bucket id="23" name="2013-MAY-15 05:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="11" /> 
    <perf_data unit="seconds" value="7.152" /> 
    </bucket> 
- <bucket id="24" name="2013-MAY-15 06:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="12" /> 
    <perf_data unit="seconds" value="6.474" /> 
    </bucket> 
    </bucket_data> 
- <graph_option> 
    <data_cell name="perfwarning" unit="seconds" value="-" /> 
    <data_cell name="perfcritical" unit="seconds" value="-" /> 
    <data_cell name="availwarning" unit="percent" value="-" /> 
    <data_cell name="availcritical" unit="percent" value="-" /> 
    <data_cell name="bucketsize" unit="seconds" value="3600" /> 
    <data_cell name="rows" unit="#" value="24" /> 
    <data_cell name="pagecomponent" unit="seconds" value="Total Time" /> 
    <data_cell name="avg_perf" unit="seconds" value="6.729" /> 
    <data_cell name="avg_avail" unit="percent" value="95.97" /> 
    <data_cell name="total_datapoint_count" unit="#" value="347" /> 
    <data_cell /> 
    </graph_option> 
    </measurement> 
- <measurement id="521406"> 
    <alias>Site2</alias> 
- <bucket_data> 
- <bucket id="1" name="2013-MAY-14 07:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="18" /> 
    <perf_data unit="seconds" value="2.247" /> 
    </bucket> 
- <bucket id="2" name="2013-MAY-14 08:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="13" /> 
    <perf_data unit="seconds" value="2.382" /> 
    </bucket> 
- <bucket id="3" name="2013-MAY-14 09:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="15" /> 
    <perf_data unit="seconds" value="2.232" /> 
    </bucket> 
- <bucket id="4" name="2013-MAY-14 10:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="14" /> 
    <perf_data unit="seconds" value="2.223" /> 
    </bucket> 
- <bucket id="5" name="2013-MAY-14 11:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="10" /> 
    <perf_data unit="seconds" value="2.265" /> 
    </bucket> 
- <bucket id="6" name="2013-MAY-14 12:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="10" /> 
    <perf_data unit="seconds" value="2.130" /> 
    </bucket> 
- <bucket id="7" name="2013-MAY-14 01:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="15" /> 
    <perf_data unit="seconds" value="2.153" /> 
    </bucket> 
- <bucket id="8" name="2013-MAY-14 02:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="11" /> 
    <perf_data unit="seconds" value="2.005" /> 
    </bucket> 
- <bucket id="9" name="2013-MAY-14 03:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="15" /> 
    <perf_data unit="seconds" value="2.322" /> 
    </bucket> 
- <bucket id="10" name="2013-MAY-14 04:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="11" /> 
    <perf_data unit="seconds" value="1.918" /> 
    </bucket> 
- <bucket id="11" name="2013-MAY-14 05:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="17" /> 
    <perf_data unit="seconds" value="1.992" /> 
    </bucket> 
- <bucket id="12" name="2013-MAY-14 06:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="12" /> 
    <perf_data unit="seconds" value="2.423" /> 
    </bucket> 
- <bucket id="13" name="2013-MAY-14 07:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="17" /> 
    <perf_data unit="seconds" value="2.327" /> 
    </bucket> 
- <bucket id="14" name="2013-MAY-14 08:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="10" /> 
    <perf_data unit="seconds" value="2.605" /> 
    </bucket> 
- <bucket id="15" name="2013-MAY-14 09:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="15" /> 
    <perf_data unit="seconds" value="2.533" /> 
    </bucket> 
- <bucket id="16" name="2013-MAY-14 10:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="18" /> 
    <perf_data unit="seconds" value="2.077" /> 
    </bucket> 
- <bucket id="17" name="2013-MAY-14 11:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="9" /> 
    <perf_data unit="seconds" value="2.356" /> 
    </bucket> 
- <bucket id="18" name="2013-MAY-15 12:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="16" /> 
    <perf_data unit="seconds" value="2.506" /> 
    </bucket> 
- <bucket id="19" name="2013-MAY-15 01:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="16" /> 
    <perf_data unit="seconds" value="2.422" /> 
    </bucket> 
- <bucket id="20" name="2013-MAY-15 02:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="17" /> 
    <perf_data unit="seconds" value="2.220" /> 
    </bucket> 
- <bucket id="21" name="2013-MAY-15 03:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="11" /> 
    <perf_data unit="seconds" value="2.669" /> 
    </bucket> 
- <bucket id="22" name="2013-MAY-15 04:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="20" /> 
    <perf_data unit="seconds" value="2.274" /> 
    </bucket> 
- <bucket id="23" name="2013-MAY-15 05:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="11" /> 
    <perf_data unit="seconds" value="2.277" /> 
    </bucket> 
- <bucket id="24" name="2013-MAY-15 06:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="12" /> 
    <perf_data unit="seconds" value="2.180" /> 
    </bucket> 
    </bucket_data> 
- <graph_option> 
    <data_cell name="perfwarning" unit="seconds" value="-" /> 
    <data_cell name="perfcritical" unit="seconds" value="-" /> 
    <data_cell name="availwarning" unit="percent" value="-" /> 
    <data_cell name="availcritical" unit="percent" value="-" /> 
    <data_cell name="bucketsize" unit="seconds" value="3600" /> 
    <data_cell name="rows" unit="#" value="24" /> 
    <data_cell name="pagecomponent" unit="seconds" value="Total Time" /> 
    <data_cell name="avg_perf" unit="seconds" value="2.269" /> 
    <data_cell name="avg_avail" unit="percent" value="100.00" /> 
    <data_cell name="total_datapoint_count" unit="#" value="333" /> 
    <data_cell /> 
    </graph_option> 
    </measurement> 
- <measurement id="521406"> 
    <alias>Site1</alias> 
- <bucket_data> 
- <bucket id="1" name="2013-MAY-14 07:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="18" /> 
    <perf_data unit="seconds" value="1.431" /> 
    </bucket> 
- <bucket id="2" name="2013-MAY-14 08:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="13" /> 
    <perf_data unit="seconds" value="1.559" /> 
    </bucket> 
- <bucket id="3" name="2013-MAY-14 09:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="15" /> 
    <perf_data unit="seconds" value="1.378" /> 
    </bucket> 
- <bucket id="4" name="2013-MAY-14 10:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="14" /> 
    <perf_data unit="seconds" value="1.307" /> 
    </bucket> 
- <bucket id="5" name="2013-MAY-14 11:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="10" /> 
    <perf_data unit="seconds" value="1.458" /> 
    </bucket> 
- <bucket id="6" name="2013-MAY-14 12:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="10" /> 
    <perf_data unit="seconds" value="1.345" /> 
    </bucket> 
- <bucket id="7" name="2013-MAY-14 01:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="15" /> 
    <perf_data unit="seconds" value="1.317" /> 
    </bucket> 
- <bucket id="8" name="2013-MAY-14 02:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="11" /> 
    <perf_data unit="seconds" value="1.465" /> 
    </bucket> 
- <bucket id="9" name="2013-MAY-14 03:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="15" /> 
    <perf_data unit="seconds" value="1.398" /> 
    </bucket> 
- <bucket id="10" name="2013-MAY-14 04:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="11" /> 
    <perf_data unit="seconds" value="1.509" /> 
    </bucket> 
- <bucket id="11" name="2013-MAY-14 05:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="17" /> 
    <perf_data unit="seconds" value="1.284" /> 
    </bucket> 
- <bucket id="12" name="2013-MAY-14 06:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="12" /> 
    <perf_data unit="seconds" value="1.759" /> 
    </bucket> 
- <bucket id="13" name="2013-MAY-14 07:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="17" /> 
    <perf_data unit="seconds" value="1.434" /> 
    </bucket> 
- <bucket id="14" name="2013-MAY-14 08:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="10" /> 
    <perf_data unit="seconds" value="1.402" /> 
    </bucket> 
- <bucket id="15" name="2013-MAY-14 09:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="15" /> 
    <perf_data unit="seconds" value="1.452" /> 
    </bucket> 
- <bucket id="16" name="2013-MAY-14 10:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="18" /> 
    <perf_data unit="seconds" value="1.216" /> 
    </bucket> 
- <bucket id="17" name="2013-MAY-14 11:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="9" /> 
    <perf_data unit="seconds" value="1.381" /> 
    </bucket> 
- <bucket id="18" name="2013-MAY-15 12:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="16" /> 
    <perf_data unit="seconds" value="1.236" /> 
    </bucket> 
- <bucket id="19" name="2013-MAY-15 01:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="16" /> 
    <perf_data unit="seconds" value="1.327" /> 
    </bucket> 
- <bucket id="20" name="2013-MAY-15 02:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="17" /> 
    <perf_data unit="seconds" value="1.465" /> 
    </bucket> 
- <bucket id="21" name="2013-MAY-15 03:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="11" /> 
    <perf_data unit="seconds" value="1.529" /> 
    </bucket> 
- <bucket id="22" name="2013-MAY-15 04:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="20" /> 
    <perf_data unit="seconds" value="1.354" /> 
    </bucket> 
- <bucket id="23" name="2013-MAY-15 05:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="11" /> 
    <perf_data unit="seconds" value="1.372" /> 
    </bucket> 
- <bucket id="24" name="2013-MAY-15 06:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="12" /> 
    <perf_data unit="seconds" value="1.219" /> 
    </bucket> 
    </bucket_data> 
- <graph_option> 
    <data_cell name="perfwarning" unit="seconds" value="-" /> 
    <data_cell name="perfcritical" unit="seconds" value="-" /> 
    <data_cell name="availwarning" unit="percent" value="-" /> 
    <data_cell name="availcritical" unit="percent" value="-" /> 
    <data_cell name="bucketsize" unit="seconds" value="3600" /> 
    <data_cell name="rows" unit="#" value="24" /> 
    <data_cell name="pagecomponent" unit="seconds" value="Total Time" /> 
    <data_cell name="avg_perf" unit="seconds" value="1.387" /> 
    <data_cell name="avg_avail" unit="percent" value="100.00" /> 
    <data_cell name="total_datapoint_count" unit="#" value="333" /> 
    <data_cell /> 
    </graph_option> 
    </measurement> 
    <ns2:link href="www.example.com" rel="slotmetadata" type="application/xml" /> 
    </graph_data> 

khung dữ liệu của tôi cần phải giống như thế này:

alias bucket_name avail_data perf_data 

tôi cố gắng này:

doc1 = xmlParse("data.xml") 
df<-xmlToDataFrame(nodes = getNodeSet(doc1, "//alias")) 

Tôi chỉ nhận được tên bí danh trong một khung dữ liệu cột. Bất kỳ ý tưởng gì khác tôi đang thiếu ở đây?

có tệp

+0

Bạn có thể làm cho tệp của mình có thể tải xuống hoặc đăng đoạn mã của mình mà tôi có thể sao chép để nhập vào bảng điều khiển r không? – SchaunW

+0

Đây có phải là XML hợp lệ không? 'xmlParse' không thành công khi tôi thử nó với mã mẫu. – Thomas

+0

@SchaunW Tôi đã đặt toàn bộ tệp trong bài đăng gốc. – user1471980

Trả lời

2

Dường như XML của bạn có một số vấn đề. Tôi chỉ có thể có được nó để làm việc bằng cách xóa những điều sau đây:

Line 58: <measurement id="521406"> 
Line 107: <measurement id="521406"> 

Vì vậy:

xml_file <- '<?xml version="1.0" encoding="UTF-8" standalone="yes" ?> 
    <graph_data xmlns:ns2="http://www.w3.org/2005/Atom"> 
    <graph_property name="calculation_method" value="Geo Mean" /> 
    <graph_property name="graph_type" value="TIME" /> <measurement id="521406"> 
    <alias>example1.com</alias> 
    <bucket_data> 
    <bucket id="1" name="2013-MAY-14 07:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="21" /> 
    <perf_data unit="seconds" value="3.102" /> 
    </bucket> 
    <bucket id="2" name="2013-MAY-14 08:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="13" /> 
    <perf_data unit="seconds" value="3.052" /> 
    </bucket> 
    <bucket id="3" name="2013-MAY-14 09:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="15" /> 
    <perf_data unit="seconds" value="3.387" /> 
    </bucket> 
    <bucket id="4" name="2013-MAY-14 10:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="15" /> 
    <perf_data unit="seconds" value="3.338" /> 
    </bucket> 
    <bucket id="5" name="2013-MAY-14 11:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="10" /> 
    <perf_data unit="seconds" value="2.149" /> 
    </bucket> 
    <bucket id="6" name="2013-MAY-14 12:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="13" /> 
    <perf_data unit="seconds" value="3.202" /> 
    </bucket> 
    <bucket id="7" name="2013-MAY-14 01:00 PM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="18" /> 
    <perf_data unit="seconds" value="2.883" /> 
    </bucket> 
    </bucket_data> 
    <graph_option> 
    <data_cell name="perfwarning" unit="seconds" value="-" /> 
    <data_cell name="perfcritical" unit="seconds" value="-" /> 
    <data_cell name="availwarning" unit="percent" value="-" /> 
    <data_cell name="availcritical" unit="percent" value="-" /> 
    <data_cell name="bucketsize" unit="seconds" value="3600" /> 
    <data_cell name="rows" unit="#" value="24" /> 
    <data_cell name="pagecomponent" unit="seconds" value="Total Time" /> 
    <data_cell name="avg_perf" unit="seconds" value="2.949" /> 
    <data_cell name="avg_avail" unit="percent" value="100.00" /> 
    <data_cell name="total_datapoint_count" unit="#" value="347" /> 
    <data_cell /> 
    </graph_option> 
    </measurement> 
    <measurement id="521406"> 
    <alias>example2.com</alias> 
    <bucket_data> 
    <bucket id="1" name="2013-MAY-14 07:00 AM"> 
    <avail_data unit="percent" value="85.71" /> 
    <data_count unit="#" value="18" /> 
    <perf_data unit="seconds" value="6.503" /> 
    </bucket> 
    <bucket id="2" name="2013-MAY-14 08:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="13" /> 
    <perf_data unit="seconds" value="6.330" /> 
    </bucket> 
    <bucket id="3" name="2013-MAY-14 09:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="15" /> 
    <perf_data unit="seconds" value="7.242" /> 
    </bucket> 
    <bucket id="4" name="2013-MAY-14 10:00 AM"> 
    <avail_data unit="percent" value="93.33" /> 
    <data_count unit="#" value="14" /> 
    <perf_data unit="seconds" value="7.083" /> 
    </bucket> 
    <bucket id="5" name="2013-MAY-14 11:00 AM"> 
    <avail_data unit="percent" value="100.00" /> 
    <data_count unit="#" value="10" /> 
    <perf_data unit="seconds" value="7.087" /> 
    </bucket> 
    <bucket id="6" name="2013-MAY-14 12:00 PM"> 
    <avail_data unit="percent" value="76.92" /> 
    <data_count unit="#" value="10" /> 
    <perf_data unit="seconds" value="6.197" /> 
    </bucket> 
    </bucket_data> 
    <graph_option> 
    <data_cell name="perfwarning" unit="seconds" value="-" /> 
    <data_cell name="perfcritical" unit="seconds" value="-" /> 
    <data_cell name="availwarning" unit="percent" value="-" /> 
    <data_cell name="availcritical" unit="percent" value="-" /> 
    <data_cell name="bucketsize" unit="seconds" value="3600" /> 
    <data_cell name="rows" unit="#" value="24" /> 
    <data_cell name="pagecomponent" unit="seconds" value="Total Time" /> 
    <data_cell name="avg_perf" unit="seconds" value="6.729" /> 
    <data_cell name="avg_avail" unit="percent" value="95.97" /> 
    <data_cell name="total_datapoint_count" unit="#" value="347" /> 
    <data_cell /> 
    </graph_option> 
    </measurement> 
    </graph_data>' 

xml_file <- xmlParse(xml_file) # Parse the XML 
xml_file <- xmlToList(xml_file) # Convert the XML to a list 

tôi chuyển đổi nó vào một danh sách thay vì một khung dữ liệu vì XML dường như không theo một cấu trúc dễ dàng chuyển đổi thành hàng và cột. Sau đó, dựa trên câu hỏi của bạn, tôi lôi ra chỉ là thông tin chứa trong "bí danh" hoặc "bucket_data" phần của "đo lường" nút:

xml_file <- xml_file[names(xml_file) == "measurement"] 
xml_file <- lapply(xml_file, function(x) x[grep("alias|bucket", names(x))]) 

Sau đó, tôi đã trải qua mỗi nút đo lường, đặt sang một bên những thông tin bí danh, đã chuyển danh sách các nhóm thành một vectơ được đặt tên, sau đó ràng buộc bí danh và các nhóm với nhau thành các cột. Cuối cùng, tôi kết nối các nút đo thành các hàng và chuyển đổi toàn bộ dữ liệu thành một khung dữ liệu.

xml_file <- lapply(xml_file, function(x) { 
    alias <- x$alias 
    buckets <- t(sapply(x$bucket_data, unlist)) 
    cbind("alias" = alias, buckets) 
}) 

xml_file <- do.call("rbind", xml_file) 

xml_file <- data.frame(xml_file, stringsAsFactors = FALSE) 
Warning message: 
In data.row.names(row.names, rowsi, i) : 
    some row.names duplicated: 2,3,4,5,6,7,8,9,10,11,12,13 --> row.names NOT used 

str(xml_file) 
'data.frame': 13 obs. of 9 variables: 
$ alias   : chr "example1.com" "example1.com" "example1.com" "example1.com" ... 
$ avail_data.unit : chr "percent" "percent" "percent" "percent" ... 
$ avail_data.value: chr "100.00" "100.00" "100.00" "100.00" ... 
$ data_count.unit : chr "#" "#" "#" "#" ... 
$ data_count.value: chr "21" "13" "15" "15" ... 
$ perf_data.unit : chr "seconds" "seconds" "seconds" "seconds" ... 
$ perf_data.value : chr "3.102" "3.052" "3.387" "3.338" ... 
$ .attrs.id  : chr "1" "2" "3" "4" ... 
$ .attrs.name  : chr "2013-MAY-14 07:00 AM" "2013-MAY-14 08:00 AM" "2013-MAY-14 09:00 AM" "2013-MAY-14 10:00 AM" ... 

Bạn vẫn sẽ cần phải dọn sạch tên cột và chuyển đổi cột thành các lớp thích hợp, nhưng nó sẽ đưa dữ liệu của bạn vào khung dữ liệu.

+0

@SchaounW, cảm ơn bạn rất nhiều. – user1471980

+0

Rất vui khi được trợ giúp. Hãy xem xét chấp nhận câu trả lời này nếu nó cho bạn những gì bạn đang tìm kiếm. – SchaunW

Các vấn đề liên quan