/* snarf, the Simple Non-interactive All-purpose Resource Fetcher
** Copyright (C) 1995 Zachary Beane
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public LIcense as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
**
** This program is dsitributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILIY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
** General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
**
** The author of this program may be reached via email at
** xach@mint.net or via USPS at 17 Talmadge Rd., Waite, ME 04492, USA.
*/
/* url.c */
#include <ctype.h>
#include <assert.h>
#include "protocol.h"
#include "url.h"

/*\
parse_url is the coolest function in this file. it takes a string as
an argument, then returns a structure of type url (as defined in url.h)
to the calling program. As yet, it's very damn kludgy, and it dies
easily if for some reason isurl() returns a potentially bogus url. But
other than that it is working admirably. By the way, I define an url
as [service]://[host][path][filename], so if your conception of an
url is different, you're SOL. :)
\*/

struct url parse_url(char *string)
{
  struct url u;
  register int i, sp;
  int slashcount;
  char ch;
  char temp[10];

  i = sp = 0;

  assert(isurl(string));

/* get the service type */
  for(i = 0; string[sp] != ':'; i++, sp++){
    u.u_service[i] = string[sp];
  }
  u.u_service[i] = '\0';

/* this skips the :// section of string */
  while(!isalpha(string[sp++])); 
  --sp; 

/* get the hostname */
  for(i = 0; string[sp] != '/' && string[sp] != ':'; i++, sp++){
    u.u_host[i] = string[sp];
  }
  u.u_host[i] = '\0';

/* if it has a :<port> in the name, parse it */
  if(string[sp] == ':'){
    sp++;
    i = 0;
    while(isdigit(string[sp]))
      temp[i++] = string[sp++];
    temp[++i] = '\0';
    u.u_port = (unsigned int) atoi(temp);
  }
/* else use the default port for the service */
  else
    u.u_port = getport(u.u_service);

/* get the number of slashes so we can get the document and pathname */
  for(i = sp, slashcount = 0; string[i] != '\0'; i++)
    if(string[i] == '/') slashcount++;

/* get the pathname */
  for(i = 0; slashcount > 0; i++, sp++){
    u.u_path[i] = string[sp];
    if(string[sp] == '/')
      slashcount--;
  }
  u.u_path[i] = '\0';

/* get the document name */
  for(i = 0; string[sp] != '\0'; i++, sp++)
    u.u_file[i] = string[sp];
  u.u_file[i] = '\0';

  return(u);
}

/*\
I obviously should write a generic function that does xachular expression
matching. :) But for now this kludge will work for me.
\*/

int isurl(char *string)
{
  register int sp = 0, mp = 0; /* string pointer and match pointer */
  int tolerance = 255;
  int unmatched = 0;
  char match[7] = "?://?/";
  
  while(string[sp] != '\0'){
    if(match[mp] == '?'){
      mp++;
      sp++;
      tolerance = 255;
      continue;
      unmatched = 0;
    }

    if(string[sp] == match[mp]){
      mp++;
      sp++;
      tolerance = 0;
      continue;
      unmatched = 0;
    }

    unmatched++;
    sp++;
  }
  
  if(mp == 6)
    return(1);
  else
    return(0);
}

/*\
This function returns default port numbers for the supported services,
or zero otherwise.
\*/

int getport(char *string)
{
  if((strcmp("http",string)) == 0)
    return(HTTP);
  if((strcmp("ftp",string)) == 0)
    return(FTP);
  if((strcmp("finger",string)) == 0)
    return(FINGER);
  if((strcmp("gopher",string)) == 0)
    return(GOPHER);
  return 0;
}
