model
DefaultMutableTreeNode root = (DefaultMutableTreeNode)treeModel.getRoot(); Enumeration etree = root.breadthFirstEnumeration(); while(etree.hasMoreElements()) {
UrlTreeNode
node
(UrlTreeNode)(((DefaultMutableTreeNode)etree.nextElement()).getUserObject());
if(node instanceof UrlTreeNode && node.equals(teststring)) return true; }
return false; } /**
* Check depth of search
* @return true if depth limit exceeded
* @param node search tree node to test the depth limit of */
public boolean depthLimitExceeded(DefaultMutableTreeNode node) {
if(node.getLevel() >= depthLimit) return true; else
return false; } /**
* add a node to the search tree
* @param parentnode parent to add the new node under * @param newnode node to be added to the tree * */ private
DefaultMutableTreeNode
addNode(DefaultMutableTreeNode
UrlTreeNode newnode)
21
=
parentnode,
{
DefaultMutableTreeNode node = new DefaultMutableTreeNode(newnode);
DefaultTreeModel treeModel = (DefaultTreeModel)searchTree.getModel(); // get our model
int index = treeModel.getChildCount(parentnode); // how many children are there already?
treeModel.insertNodeInto(node, parentnode,index); // add as last child TreePath tp = new TreePath(parentnode.getPath());
searchTree.expandPath(tp); // make sure the user can see the node just added return node; } /**
* determines if the given url is in a one of the top level domains in the domain * search list *
* @param url url to be checked
* @return true if its ok, else false if url should be skipped */
private boolean isDomainOk(URL url) {
if(url.getProtocol().equals(\ return true; // file protocol always ok
String host = url.getHost(); int lastdot = host.lastIndexOf(\ if(lastdot <= 0) return true;
String domain = host.substring(lastdot); // just the .com or .edu part
if(ipDomainList.length == 0) return true;
for(int i=0; i < ipDomainList.length; i++)
22
{
if(ipDomainList[i].equalsIgnoreCase(\ return true;
if(ipDomainList[i].equalsIgnoreCase(domain)) return true; }
return false; } /**
* upate statistics label */
private void updateStats() {
statsLabel.setText(\ Sites found : \ } /**
* repairs a sloppy href, flips backwards /, adds missing / * @return repaired web page reference * @param href web site reference */
public static String fixHref(String href) {
String newhref = href.replace('\\\\', '/'); // fix sloppy web references int lastdot = newhref.lastIndexOf('.'); int lastslash = newhref.lastIndexOf('/'); if(lastslash > lastdot) {
if(newhref.charAt(newhref.length()-1) != '/') newhref = newhref+\ // add on missing / }
return newhref;
23
} /**
* recursive routine to search the web
* @param parentnode parentnode in the search tree * @param urlstr web page address to search */
public void searchWeb(DefaultMutableTreeNode parentnode, String urlstr) {
if(urlHasBeenVisited(urlstr)) // have we been here? return; // yes, just return
if(depthLimitExceeded(parentnode)) return;
if(sitesSearched > siteLimit) return;
yield(); // allow the main program to run
if(stopSearch) return;
messageArea.append(\ sitesSearched++; updateStats(); //
// now look in the file // try{
URL url = new URL(urlstr); // create the url object from a string.
String protocol = url.getProtocol(); // ask the url for its protocol
if(!protocol.equalsIgnoreCase(\ {
24
messageArea.append(\ Skipping : \ return; }
String path = url.getPath(); // ask the url for its path int lastdot = path.lastIndexOf(\ if(lastdot > 0) {
String extension = path.substring(lastdot); // just the file extension
if(!extension.equalsIgnoreCase(\ return; // skip everything but html files }
if(!isDomainOk(url)) {
messageArea.append(\ Skipping : \ return; }
UrlTreeNode newnode = new UrlTreeNode(url); // create the node
InputStream in = url.openStream(); // ask the url object to create an input stream InputStreamReader isr = new InputStreamReader(in); // convert the stream to a reader. DefaultMutableTreeNode treenode = addNode(parentnode, newnode);
SpiderParserCallback cb = new SpiderParserCallback(treenode); // create a callback object
ParserDelegator pd = new ParserDelegator(); // create the delegator pd.parse(isr,cb,true); // parse the stream isr.close(); // close the stream } // end try
catch(MalformedURLException ex) {
25