/*  VER 200   TAB P   $Id: getarticle.c,v 1.12 1998/09/21 10:04:29 src Exp $
 *
 *  fetch articles via an NNTP server
 *
 *  all NNTP requests are placed in a request queue.
 *  when the response arrives, it is taken care of by a 
 *  function called handle_xxx()
 *
 *  copyright 1996, 1997, 1998 Egil Kvaleberg, egil@kvaleberg.no
 *  the GNU General Public License applies
 *
 *  $Log: getarticle.c,v $
 *  Revision 1.12  1998/09/21 10:04:29  src
 *  Added new command line options for --inews
 *
 *  Revision 1.11  1998/09/11 09:17:42  src
 *  Check path consistency (--no-path) and length (--max-path)
 *  GNU style option --help, --version, --dry-run, changed --noxx to --no-xx
 *  Check for putenv and setenv, added xstrcpy
 *
 *  Revision 1.10  1998/09/09 07:32:11  src
 *  Version 1.1
 *
 *  Revision 1.9  1998/09/03 02:49:29  src
 *  Fixed stuff detected by -Wall
 *
 *  Revision 1.8  1998/07/12 09:39:28  src
 *  newsx version 1.0
 */

#include "common.h"
#include "proto.h"
#include "options.h"
#include "statistics.h"
#include "nntp.h"

long bytes_in_spool; /* external: number of bytes read */
long latest_where;   /* external: last article actually processed */
long latest_next;    /* latest response from a NEXT, -1 if none */
int next_at_witts_end; /* NEXT has reached the end */
int stat_failures;   /* STAT failures in a row */

static int next_requests;   /* how many outstanding NEXTs */
static int latest_contains_msgnum; /* latest request contained a message number */

/* 
 *  BUG: this can be a real memory hog
 *  BUG: perhaps we should have had a maxlimit here...
 */
char *temp_store = 0;
long temp_len = 0;
#define TEMP_STEP 100000 /* BUG: tuneable? */

static char cur_group[80]; /* for error reporting */
static int no_stat = 0; /* for hosts that lack a STAT */

/*
 *  read an article proper
 *  and feed it to the spool
 *  argument is just for error reports
 *  return false on errors that mean we should not continue
 */
static int 
read_article(long where)
{
    char line[NNTP_STRLEN+1];
    int newline = 1;
    int header = 1;
    int ispath = 0;
    int path_ok = -1;
    char *path_line = 0;
    long len;
    long bytecount = 0L; /* BUG: */

    line[NNTP_STRLEN] = '\0'; /* better safe than sorry */

    /* fetch the article, header and body */
    for (;;) {
	if (!get_server_msg(line, NNTP_STRLEN)) {
	    /* timeout: simply give up */
	    return 0;
	}
	len = strlen(line);
	gross_bytecount += len;

	/* end of file */
	if (newline && line[0]=='.' && (line[1]=='\r' || line[1]=='\n')) break;

	/* process header part */
	if (header && newline && len > 0) {
	    switch (line[0]) {
	    case '\r':
	    case '\n':
		header = 0;
		break;
	    case 'P':
		if (is_tag(line,"Path:")) {
		    /* BUG: what about veeeery long lines - point into buffer instead */
		    /* BUG: or can there really be long lines? */
		    path_ok = traverse_path(path_line = line+5);
		    ispath = 1;
		    break;
		}
	    default:
		ispath = 0;
		break;
	    case ' ':
	    case '\t':
		if (ispath && (line[0]==' ' || line[0]=='\t')) {
		    /* BUG: can path really be on a continue line? */
		    path_ok |= traverse_path(line+1);
		}
		break;
	    }
	}

	/* find and strip newlines */
	newline = 0;
	while (len > 0 && (line[len-1]=='\r' || line[len-1]=='\n')) {
	    /* fix by: "J. Richard Sladkey" <jrs@foliage.com> */
	    if (line[len-1]=='\n') newline=1;
	    --len;
	}

	if (bytecount+len+newline > temp_len) {
	     /* there is not enough room */
	     temp_store = temp_store ? realloc(temp_store,temp_len+TEMP_STEP)
				     : malloc(TEMP_STEP);
	     if (!temp_store) {
		 log_msg(L_ERR,"out of memory");
		 return 0;
	     }
	     temp_len += TEMP_STEP;
	}

	if (len > 0) memcpy(temp_store+bytecount,line,len);
	bytecount += len;
	if (newline) temp_store[bytecount++] = '\n';
    }

    /* see if the path is suspect */
    if (!nopath_opt && path_ok != 1) {
	if (path_line) {
	    /* specified Path is wrong */
	    log_msg(L_ERR,"ARTICLE %ld did not contain '%s' in 'Path:'", 
					where, get_exclusion());
	    log_msg(L_ERR,"Path line was '%s'", path_line);
	    /* BUG: here and elsewhere: say which newsgroup, for Pete's sake... */
	} else {
	    /* no Path specified */
	    if (!add_header || strncmp(add_header,"Path",4)!=0) {
		log_msg(L_ERR,"ARTICLE %ld did not contain a 'Path:'", where);
	    }
	}
	return 0;
    }

 /* sprintf(line," writing %ld bytes", bytecount); */
 /* progtitle2(cur_group, line); */

    if (!write_incoming(temp_store,bytecount,(path_ok >= 0))) return 0;

    bytes_in_spool += bytecount;
    net_bytecount += bytecount;
    ++fetched_articles;

    return 1;
}

/*
 *  fetch current article proper
 *  return false if no point in continuing
 */
static int 
handle_article(char *status, long where)
{
    int ok;
    long a;
    char *endptr;

    switch (strtoul(status,&endptr,10)) {
    case OK_ARTICLE:                    /* article OK */
	/* verify article number against outstanding requests... */
	a = strtoul(endptr,&endptr,10); /* verify article number */
	if (a != where) {
	    if (a==0 && window <= 1) {
		/* OK, we'll allow it - presumably a non-conforming server */
		static told = 0;
		if (!told) {
		    log_msg(L_INFO,"no article number in ARTICLE response: %s",
								       status);
		    told = 1;
		}
	    } else {
		log_msg(L_ERR,"ARTICLE %ld out of phase: %s", where, status);
		return 0;
	    }
	}
	if ((ok = read_article(where))) {
	    /* article is presumably OK */
	    latest_where = where;

	    /* show that something is happening */
	    if (debug_opt == 2) show_activity();
	}
	return ok;

    case ERR_NOARTIG:                   /* no such article in group */
    case ERR_NOART:                     /* no such article */
	/* BUG: message does not contain an article number... */
	/* article has disappeared, ignore it */
	log_msg(L_DEBUG,"article in %s disappeared: %s",cur_group,status);
	latest_where = where;

	return 1;

    default:                            /* otherwise, protocol error */
     /* progtitle2(cur_group, ", error"); */
	log_msg(L_ERR,"NNTP article read error: got \"%s\"", status);
	/* stop here */
	return 0;
    }
}

/*
 *  fetch current article proper
 *  return false if no point in continuing
 */
static int 
current_article(long where)
{
    char request[NNTP_STRLEN+1];

    if (no_stat) {
	sprintf(request, " %ld", where);
	progtitle2(cur_group, request);
    }

    sprintf(request, "ARTICLE %ld%s", where, newline);
    latest_contains_msgnum = 1;
    return put_request(request,handle_article,where);
}

/*
 *  process message ID from STAT or NEXT
 *  either by doing nothing, or by requesting an ARTICLE
 */
static int 
process_msgid(char *msgid,long where)
{
    /* check if already in news history database */
    if ((!history || history[0]) && history_lookup(msgid)) {
	++history_articles;
	latest_where = where;
	return 1;
    }
    /* check if read already */
    if (!new_msgid(msgid)) {
	++already_articles;
	latest_where = where;
	return 1;
    }
    /* check if triggered by message ID filter */
    if (do_mfilter(msgid)) {
	++mfilter_articles;
	latest_where = where;
	return 1;
    }
    /* BUG: have max file size?? */

    log_msg(L_DEBUGMORE,"fetching article %ld",where);
    return current_article(where);
}

/*
 *  handle article STAT result  
 */
static int 
handle_stat(char *status,long where)
{
    char msgid[NNTP_STRLEN+1];
    long a;
    char *endptr;

    switch (strtoul(status,&endptr,10)) {
    case OK_NOTEXT:                     /* follows STAT... */
	/* 223 3800 <jeqk9rzgqa4.fsf@storm.stud.ntnu.no> status */
	if (sscanf(endptr,"%ld %[^ \n\t]",&a,msgid) != 2) {
	    log_msg(L_ERR,"bad STAT reponse: %s", status);
	    return 0;
	}
	if (a != where) {
	    log_msg(L_ERR,"STAT %ld out of phase: %s", where, status);
	    return 0;
	}
	stat_failures = 0;

	return process_msgid(msgid,a);

    case ERR_COMMAND:                   /* STAT is not implemented */
     /* progtitle2(cur_group, ", no STAT"); */
	log_msg(L_INFO,"server lacks STAT command: %s",status);
	no_stat = 1;
    case 99: /* dummy if no STAT command */
	log_msg(L_DEBUGMORE,"unconditionally fetching article %ld",where);
	return current_article(where);

    case ERR_NOARTIG:                   /* no such article in group */
    case ERR_NOART:                     /* no such article */
     /* progtitle2(cur_group, ", no article"); */
	/* article no longer there, ignore it */
	++stat_failures;
	log_msg(L_DEBUGMORE,"article %ld in %s not on server",where,cur_group);
	return 1;

    default:                            /* otherwise, protocol error */
     /* progtitle2(cur_group, ", STAT error"); */
	log_msg(L_ERR,"NNTP unknown STAT error: got \"%s\"", status);
	/* stop here */
	return 0;
    }
}

/*
 *  handle article NEXT result
 */
static int 
handle_next(char *status,long where)
{
    char msgid[NNTP_STRLEN+1];
    long a;
    char *endptr;

    --next_requests;

    switch (strtoul(status,&endptr,10)) {
    case OK_NOTEXT:                     /* follows NEXT.. */
	/* 223 3800 <jeqk9rzgqa4.fsf@storm.stud.ntnu.no> */
	if (sscanf(endptr,"%ld %[^ \n\t]",&a,msgid) != 2) {
	    log_msg(L_ERR,"bad NEXT reponse: %s", status);
	    /* try to recover in a fashion */
	    no_next = 1;
	    sprintf(msgid, "STAT %ld%s", where,newline);
	    latest_contains_msgnum = 1;
	    return put_request(msgid,handle_stat,where);
	}
	latest_next = a;
	return process_msgid(msgid,a);

    case ERR_COMMAND:                   /* NEXT is not implemented */
	log_msg(L_INFO,"server lacks NEXT command: %s",status);
	no_next = 1;
	log_msg(L_DEBUGMORE,"unconditionally fetching article %ld",where);
	return current_article(where);

    case ERR_NONEXT:                    /* no next article */
	/* article no longer there, ignore it */
	next_at_witts_end = 1;
	log_msg(L_DEBUGMORE,"NEXT reports no more articles in %s",cur_group);
	return 1;

    case ERR_NOARTIG:                   /* no such article in group */
    case ERR_NOART:                     /* no such article */
	next_at_witts_end = 1;
	log_msg(L_ERR,"NNTP unfamiliar NEXT reponse: got \"%s\"", status);
	return 1;

    default:                            /* otherwise, protocol error */
	log_msg(L_ERR,"NNTP unknown NEXT error: got \"%s\"", status);
	/* stop here */
	return 0;
    }
}

/*
 *  fetch an article in current group
 *  return false if no point in continuing
 */
int 
fetch_article(long where,int first)
{
    char request[NNTP_STRLEN+1];
    char status[NNTP_STRLEN+1];

    if (first) {
	next_requests = 0;
	stat_failures = 0;
	latest_contains_msgnum = next_at_witts_end = 0;
	latest_next = -1;
    }

    if (where <= latest_next) {
	/* no need: NEXT has already skipped beyond this point */
	return 1;
    }

    if (no_stat) {
	sprintf(status,"%d",99);
	/* fake dummy STAT message that will cause an ARTICLE */
	return handle_stat(status,where);
    }

    /* enquire article status and message ID */
    sprintf(request, " %ld", where);
    progtitle2(cur_group, request);

    if (!first && !no_next && stat_failures >= stat_attempts) {
	if (next_at_witts_end) {
	    /* nothing more to do */
	    return 1;
	}
	while (latest_contains_msgnum && next_requests > 0) {
	    /*
	     *  there are unanswered NEXT requests in the queue,
	     *  followed by an ARTICLE or STAT. this means we must
	     *  wait untill we get the answer from the last of
	     *  the NEXT request. also, a successfull STAT response
	     *  will cause us to reconsider re. the NEXT request
	     */
	    int n;
	    log_msg(L_DEBUGMORE,"wait before issuing NEXT");
	    if (!(n = process_request())) return 0;

	    if (stat_failures < stat_attempts) {
		/* a good STAT has arrived, so skip the NEXT */
		log_msg(L_DEBUGMORE,"a good STAT arrived, do STAT instead");
		goto issue_a_stat;
	    }

	    if (n < 0) break; /* queue is empty */
	}
	sprintf(request, "NEXT%s", newline);
	++next_requests;
	latest_contains_msgnum = 0;
	return put_request(request,handle_next,where);
    }

    /* issue a standard STAT */
  issue_a_stat:
    sprintf(request, "STAT %ld%s", where,newline);
    latest_contains_msgnum = 1;
    return put_request(request,handle_stat,where);
}

/*
 *  select a group              
 *  return 1 if OK, 0 if no group, -1 if no point in continuing
 */
static int 
handle_group(char *status,char *group,long *firstp,long *lastp)
{
    long msgs;
    char *endptr;

    switch (strtoul(status,&endptr,10)) {
    case OK_GROUP:                      /* Group selected */
	if (sscanf(endptr,"%ld %ld %ld",&msgs,firstp,lastp) != 3) {
	    log_msg(L_ERR,"group select bad format: \"%s\"", status);
	    return -1;
	}
	++fetched_groups;
	return 1;

    case ERR_NOGROUP:                   /* server haven't seen it before */
	log_msg(L_ERR,"server does not carry group \"%s\"", group);
	return 0;

    case ERR_NOAUTH:                    /* server won't allow us in here */
	log_msg(L_ERR,"authorization required for group \"%s\"", group);
	return 0;

    default:                            /* otherwise, protocol error */
	log_msg(L_ERR,"NNTP group select protocol error: got \"%s\"", status);
	return -1;
    }
}

/*
 *  select a group              
 *  return 1 if OK, 0 if no group, -1 if no point in continuing
 */
int 
select_group(char *group,long *firstp,long *lastp)
{
    char request[NNTP_STRLEN+1];
    char status[NNTP_STRLEN+1];

    strncpy(cur_group,group,sizeof(cur_group)-1); /* for error reporting */
    progtitle(cur_group);

    sprintf(request, "GROUP %s%s", group,newline);

    if (!put_server(request)) return -1;

    /* get status */
    if (!get_server_nntp(status, sizeof(status))) return -1;

    return handle_group(status,group,firstp,lastp);
}

