摸一篇xml解析。

以该RSS订阅为例:https://link.springer.com/search.rss?facet-content-type=Article&facet-journal-id=10664&channel-name=Empirical+Software+Engineering

xml解析大概分为三种,DOM解析,SAX解析和pull解析,我先用的DOM解析,发现在安卓里需要的内存太高了,然后就换成了SAX解析,pull还没摸过(摸了再添。

注意注意:安卓主线程里面是不允许进行网络操作的!要放在UI线程里!

DOM解析


DOM解析类似于一棵树,解析的时候相当于把xml里的内容都爬下来然后对树的节点进行取用,因此对内存的要求比较高。

使用方法:用document解析工厂把url的document爬下来,再用相应函数去提取节点的值。

常用函数:

NodeList Document.getElementsByTagName(String s) //通过标签名获得子树(大概

Node NodeList.item(int i) //返回列表的第i个节点

NodeList Node.getChildNodes() //返回节点的所有子节点列表

String Node.getTextContent() //获得节点内容

int NodeList.getLength() //返回子节点个数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
public void InsertItemFromURL(String url){
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
try{
DocumentBuilder db = dbf.newDocumentBuilder();
Document document = db.parse(url);
NodeList itemList = document.getElementsByTagName("item");
NodeList channelTitle = document.getElementsByTagName("title");
NodeList channelDesc = document.getElementsByTagName("description");
NodeList channelLink = document.getElementsByTagName("link");

String folderTitle = channelTitle.item(0).getFirstChild().getNodeValue();
String folderDesc = channelDesc.item(0).getFirstChild().getNodeValue();
String folderLink = channelLink.item(0).getFirstChild().getNodeValue();

Channel channel = new Channel();
channel.setTitle(folderTitle);
channel.setDescription(folderDesc);
channel.setLink(folderLink);
InsertChannel(channel);

Folder folder = new Folder();
folder.setName(folderTitle);
InsertFolder(folder);

for(int i = 0; i < itemList.getLength(); i++) {
Item item = new Item();
Node n = itemList.item(i);
NamedNodeMap attrs = n.getAttributes();
NodeList childNodes = n.getChildNodes();
item.setRead(0);

String title = new String();
for (int k = 0; k < childNodes.getLength(); k++) {
if (childNodes.item(k).getNodeType() == Node.ELEMENT_NODE) {
switch (childNodes.item(k).getNodeName()) {
case "title":
title = childNodes.item(k).getTextContent();
item.setTitle(title);
break;
case "description":
item.setDescription(childNodes.item(k).getTextContent());
break;
case "link":
item.setLink(childNodes.item(k).getTextContent());
break;
case "pubDate":
item.setPubDate(childNodes.item(k).getTextContent());
break;
case "guid":
item.setGuid(childNodes.item(k).getTextContent());
break;
default:
break;
}
}
}
InsertItem(item);

Folder_item folder_item = new Folder_item();
folder_item.setFolderName(folderTitle);
folder_item.setItemName(title);
InsertFolderItem(folder_item);
}
}catch (Exception e){
}
}

SAX解析


SAX解析就是顺序读取,从document开始慢慢读取到document尾部,所以要的内存就少的多,因为不需要把所有文件存到内存里面再进行取用。

helper继承DefaultHandler类。

需要重写的函数:

startDocument() //解析文档开始时的操作

startElement(String uri, String localName, String qName, Attributes attributes) //读到标签时的操作

characters(char[] ch, int start, int length) //读到标签内容时的操作

endElement(String uri, String localName, String qName) //标签结束时的操作

endDocument() //文档尾的操作

使用方法:用URLConnection获取输入流,使用SAX解析工厂,用解析工厂生成解析器,把输入流和helper类传给解析器。

大致代码如下(bean类的懒得复制了,大概看看就行):

1
2
3
4
5
6
7
8
9
10
11
private SaxHelper readXmlForSAX(String link) throws Exception {
URL url = new URL(link);
URLConnection connection = url.openConnection();
InputStream inputStream = connection.getInputStream();
SaxHelper saxHelper = new SaxHelper();
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser parser = factory.newSAXParser();
parser.parse(inputStream, saxHelper);
inputStream.close();
return saxHelper;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
package com.example.quiterss;

import android.util.Log;

import com.example.quiterss.bean.Channel;
import com.example.quiterss.bean.Folder;
import com.example.quiterss.bean.Folder_item;
import com.example.quiterss.bean.Item;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import java.util.ArrayList;

public class SaxHelper extends DefaultHandler {
private Item item;
private ArrayList<Item> items;
private Channel channel;
private Folder folder;
private Folder_item folder_item;
private ArrayList<Folder_item> folder_items;
private String tagName = null;
private String folder_title = null;
private Boolean bl = false;

@Override
public void startDocument() throws SAXException {
this.items = new ArrayList<Item>();
this.channel = new Channel();
this.folder = new Folder();
this.folder_items = new ArrayList<Folder_item>();
Log.d("----------", "startDocument:init items");
}

@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if(localName.equals("item")){
item = new Item();
folder_item = new Folder_item();
bl = true;
}else if (localName.equals("channel")){
bl = false;
}
this.tagName = localName;
}

/*读取到内容*/
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
if (this.tagName != null) {
String data = new String(ch, start, length);
if (bl) {
switch (this.tagName) {
case "title":
this.item.setTitle(data);
this.folder_item.setFolderName(folder_title);
this.folder_item.setItemName(data);
Log.d("TAG", "characters: " +data);
break;
case "description":
this.item.setDescription(data);
break;
case "link":
this.item.setLink(data);
break;
case "pubDate":
this.item.setPubDate(data);
break;
case "guid":
this.item.setGuid(data);
break;
default:
break;
}
}else {
switch (this.tagName) {
case "title":
this.channel.setTitle(data);
folder_title = data;
this.folder.setName(data);
break;
case "description":
this.channel.setDescription(data);
break;
case "link":
this.channel.setLink(data);
break;
default:
break;
}
}
}
}


@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if(localName.equals("item")){
item.setRead(0);
this.items.add(item);
Log.d("aaaaaaaaaaaaaaa", "endElement: 解析了一条数据");
this.folder_items.add(folder_item);
item = null;
folder_item = null;
}
this.tagName = null;
}

@Override
public void endDocument() throws SAXException {
bl = false;
Log.d("----------------", "size:" + folder_items.size());
super.endDocument();
}

public ArrayList<Item> getItems() {
return items;
}

public Channel getChannel() {
return channel;
}

public Folder getFolder() {
return folder;
}

public ArrayList<Folder_item> getFolder_items() {
return folder_items;
}
}