05 August 2008
BFS
O((V + E) * L) where L is maximum length of a url
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
/**
* // This is the HtmlParser's API interface.
* // You should not implement it, or speculate about its implementation
* interface HtmlParser {
* public List<String> getUrls(String url) {}
* }
*/
class Solution {
public List<String> crawl(String startUrl, HtmlParser htmlParser) {
Set<String> set = new HashSet<>();
Queue<String> queue = new LinkedList<>();
String hostname = getHostname(startUrl);
queue.offer(startUrl);
set.add(startUrl);
while (!queue.isEmpty()) {
String currentUrl = queue.poll();
for (String url : htmlParser.getUrls(currentUrl)) {
if (url.contains(hostname) && !set.contains(url)) {
queue.offer(url);
set.add(url);
}
}
}
return new ArrayList<String>(set);
}
private String getHostname(String Url) {
String[] ss = Url.split("/");
return ss[2];
}
}
DFS, crawler一般不用
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
class Solution {
public List<String> crawl(String startUrl, HtmlParser htmlParser) {
Set<String> set = new HashSet<>(Arrays.asList(startUrl));
String hostName = startUrl.split("/")[2];
dfs(set,htmlParser,hostName,startUrl);
return new ArrayList<String>(set);
}
public void dfs(Set<String> visited, HtmlParser hp, String hostName, String currentUrl){
for (String url : hp.getUrls(currentUrl)) {
if (url.contains(hostName) && visited.add(url)) {
dfs(visited,hp,hostName,url);
}
}
}
}