/*
	proxyscan.c
	August 2010
	(C)Copyright 2010 Eric Shalov. All Rights Reserved.
	
	An asynchronous TCP port scanner / proxy locator
	for MacOS/X (Tested on 10.4 and 10.5.8) and Linux (Tested on 2.6)
	
	Single-threaded scanner, uses select() to simultaneously
	scan lotsa ports, spawns a check_proxy() function on ports that
	connect to check if they're open proxies.
*/

#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#include <stdlib.h>
#include <errno.h>
#include <time.h>

#include <fcntl.h>
#include <sys/select.h>
#include <sys/time.h>

#ifdef _LINUX_TYPES_H
#include <sys/vfs.h>
#endif

#ifdef _DARWIN_C_SOURCE
#include <sys/param.h>
#include <sys/mount.h>
#include <sys/sysctl.h>
#endif

#define STATE_INITIALIZED	0
#define STATE_ATTEMPTING	1
#define STATE_TIMEOUT		2
#define STATE_REFUSED		3
#define STATE_CONNECTED		4
#define STATE_AWAITING_REPLY	5
#define STATE_SUCCESS		6
#define STATE_FAILED		7

/* For collecting stats */
unsigned long proxies_found = 0,
	 connections_failed = 0,
	 connections_refused = 0,
	 ports_timed_out = 0;

#define BUF_SIZE 1024

struct active_connection {
	int state;
	int sd;
	unsigned char ip[4];
	int port;
	struct timeval attempt_began, last_tx, terminated;
	char buffer[BUF_SIZE];
	int buf_bytes;
};

int local_mode = 0;
int debug = 0;

int num_actives = 250;
int connection_timeout_secs = 2;
int http_reply_timeout = 90;
unsigned long refresh_us = 0; /* throttle rate, delay in ns before reusing a connection */
unsigned long select_timeout_secs   =      0; /* max time spent in select() */
unsigned long select_timeout_us     = 500000; /* max time spent in select() */
unsigned long redraw_frequency_secs =      2; /* minimum delay between redraws */
unsigned long redraw_frequency_us   =      0; /* minimum delay between redraws */
struct active_connection *active;
int ports_to_scan[] = {80,81,82,90,3128,7000,8000,8080,8081,9000};
char *url = "http://www.nrsc.org/";
char *verification_string = "NRSC";

/* prototypes */
void show_actives();
void new_connection(int i);
float load_avg();
void save_proxy(int i);
int proxyscan();


int main(int argc, char *argv[]) {
	int ch;
	
	while ((ch = getopt(argc, argv, "c:h:lt:")) != -1) {
		switch(ch) {
			case 'c':
				num_actives = atoi(optarg);
				break;
			case 'h':
				http_reply_timeout = atoi(optarg);
				break;
			case 'l':
				local_mode = 1;
				break;
			case 't':
				connection_timeout_secs = atoi(optarg);
				break;
			default:
				fprintf(stderr,
					"%s: %s [-c connections] [-h http-timeout] [-t connection-timeout] [-l]\n"
					"\t-l for local-mode\n",
					argv[0], argv[0]);
				exit(1);
		}
	}

	if( num_actives > 250 && geteuid() != 0 ) {
		fprintf(stderr,
			"%s: Must be root to use more than 250 connections.\n", argv[0]);
		exit(1);
	}

	active = (struct active_connection *)malloc( sizeof(struct active_connection) * num_actives);

	proxyscan();
	
	free(active);
	
	return 0;
}

int proxyscan() {
	int   highest_sd;
	int   rval; /* socket descriptor for connect */
	
	int err_val;
	socklen_t opt_len;

	struct hostent *hostaddr;   //To be used for IPaddress
	struct sockaddr_in servaddr;   //socket structure
	
	fd_set read_fds, write_fds, err_fds;
	struct timeval timer, elapsed, now;
	
	char ip_as_string[16];
	int i;
	
	char request[1024];

	int bytes_in;	
	
	int http_status;

	struct timeval termination_elapsed;
	struct timeval last_redraw, redraw_elapsed, redraw_frequency;

	/* Initialize random-number generator */
	gettimeofday(&now, NULL);
	srand(now.tv_usec);
	
	/* Fill the active scanning table */
	for(i=0;i<num_actives;i++) new_connection(i);

	setbuf(stdout,NULL);

	timerclear(&last_redraw);
	printf("\033[2J");
	printf("\033[23H"
		"Test URL: %s\n",
		url);

	/* Main loop:
	   	-initialize any REFUSED, SUCCESS, or TIMEOUT connections
	   	-connect() asynchronously to initialized sockets
	   	-select() on operating sockets
	   	-update states
	*/
	while(1) {
		gettimeofday(&now, NULL);
		
		/* Check if it's time yet to do a screen redraw */
		timersub(&now, &last_redraw, &redraw_elapsed);
		redraw_frequency.tv_sec  = redraw_frequency_secs;
		redraw_frequency.tv_usec = redraw_frequency_us;
		
		if( timercmp(&redraw_elapsed, &redraw_frequency, >) ) {
			gettimeofday(&last_redraw, NULL);
			show_actives();
			
			if( load_avg() > 20.0 ) {
				printf("Whoa, the system's getting busy, shutting down.\n");
				exit(1);
			}
		}

		/* Look for any terminated entries in connection table and
		   re-initialize them with fresh ports to search.
		*/
		gettimeofday(&now, NULL);
		for(i=0;i<num_actives;i++) {
			if(active[i].state == STATE_SUCCESS ||
			   active[i].state == STATE_TIMEOUT ||
			   active[i].state == STATE_REFUSED ||
			   active[i].state == STATE_FAILED) {   
			   
			   	timersub(&now, &active[i].terminated, &termination_elapsed);
			   	
			   	/* Throttle the scan */
			   	if(termination_elapsed.tv_usec >= refresh_us)
				   	new_connection(i);
			}
			
		}
		

		/* Start connection attempt for any ready (initialized) connections: */
		for(i=0;i<num_actives;i++) {
			if(active[i].state == STATE_INITIALIZED) {   
				active[i].sd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); //created the tcp socket
				fcntl(active[i].sd, F_SETFL, O_NONBLOCK);
				if (active[i].sd == -1) {
					if(errno == EBADF) {
						fprintf(stderr,
							"You must increase the process file descriptor limit with:\n"
							"   # ulimit -n %d\n"
							"before running the process.\n",
							num_actives+5);
						exit(1);
					} else {
						perror("socket()\n");
						return (errno);
					}
				}

				/* set hostaddr */
				memset( &servaddr, 0, sizeof(servaddr));
				servaddr.sin_family = AF_INET;
				servaddr.sin_port = htons(active[i].port); /* set the portno */
				sprintf(ip_as_string,"%d.%d.%d.%d",
					active[i].ip[0], active[i].ip[1],
					active[i].ip[2], active[i].ip[3]);
				hostaddr = gethostbyname(ip_as_string); /* get the ip 1st argument */
				memcpy(&servaddr.sin_addr, hostaddr->h_addr, hostaddr->h_length);
				
				if(debug) printf("Connecting to %s:%d...\n\n", ip_as_string, active[i].port);
				rval = connect(active[i].sd, (struct sockaddr *) &servaddr, sizeof(servaddr));
				active[i].state = STATE_ATTEMPTING;
				gettimeofday(&active[i].attempt_began, NULL);

			}
		}


		/* setup the select(). select on all connections
		   that are STATE_ATTEMPTING or STATE_AWAITING_REPLY */
		FD_ZERO(&read_fds);
		FD_ZERO(&write_fds);
		FD_ZERO(&err_fds);
		highest_sd = 0;
		for(i=0;i<num_actives;i++) {
			if(active[i].state == STATE_ATTEMPTING || active[i].state == STATE_AWAITING_REPLY) {
				FD_SET(active[i].sd, &read_fds);
				FD_SET(active[i].sd, &write_fds);
				FD_SET(active[i].sd, &err_fds);
				if(active[i].sd > highest_sd) highest_sd = active[i].sd;
			}
		}
		timer.tv_sec  = select_timeout_secs;
		timer.tv_usec = select_timeout_us;
			
		if(debug) printf("select() returned:\n");
		select(highest_sd+1, &read_fds, &write_fds, &err_fds, &timer);

		for(i=0;i<num_actives;i++) {
			if(active[i].state == STATE_ATTEMPTING || active[i].state == STATE_AWAITING_REPLY) {
				if( FD_ISSET(active[i].sd, &write_fds) ) {
					if(debug) printf(", %d", i);
				}
			}
		}
		if(debug) printf(".\n");

		for(i=0;i<num_actives;i++) {
			if(active[i].state == STATE_AWAITING_REPLY) {
				if( FD_ISSET(active[i].sd, &read_fds) ) {
					if(debug) printf("DATA READY ON CONNECTION %d!\n", i);
					if(active[i].buf_bytes < BUF_SIZE);
					bytes_in = read(active[i].sd, active[i].buffer + active[i].buf_bytes, BUF_SIZE - active[i].buf_bytes);
					if(bytes_in > 0) {
						active[i].buf_bytes += bytes_in;
						if(debug) printf("%d BYTES READ IN ON CONN %d, CONN BUFFER NOW HAS %d BYTES\n",
							bytes_in, i, active[i].buf_bytes);

						if(active[i].buf_bytes < BUF_SIZE)
							active[i].buffer[active[i].buf_bytes] = '\0';
						else active[i].buffer[BUF_SIZE-1] = '\0';
						
						if(debug) printf("buffer = [%s]\n", active[i].buffer);
						printf("\033[23;1H\033[K"
							"Received %d bytes from %u.%u.%u.%u:%d:\n"
							"\033[24;1H\033[K",
							active[i].buf_bytes,
							active[i].ip[0], active[i].ip[1], active[i].ip[2], active[i].ip[3],
							active[i].port);
						{
							int chars_printed = 0;
							int p;
							for(p=0;p<active[i].buf_bytes && chars_printed<78;p++)
								if(active[i].buffer[p] == '\n') {
									printf("/");
									++chars_printed;
								}
								else if(active[i].buffer[p] >= ' ' && active[i].buffer[p] <= '~') {
									printf("%c", active[i].buffer[p]);
									++chars_printed;
								}
						}
						printf("\033[1;1H\n");

						/* Look for an HTTP reply status code */
						if( sscanf(active[i].buffer,"HTTP/1.0 %d\n", &http_status) == 1) {
							if(http_status == 200) {
								if(strstr(active[i].buffer, verification_string)) {
									if(debug) printf("Connection %d: SUCCESS, PROXY FOUND AT %d.%d.%d.%d:%d!\n",
										i, active[i].ip[0], active[i].ip[1], active[i].ip[2], active[i].ip[3],
										active[i].port);
									active[i].state = STATE_SUCCESS;
									gettimeofday(&active[i].terminated, NULL);
									close(active[i].sd);
									++proxies_found;
									/* Add proxy to list... */
									save_proxy(i);
								} else {
									/* dumb access points and stuff like to give back an HTTP 200
									   even though they're not giving you what you asked for */
									if(debug) printf("Connection %d: Not actually proxying for us.\n", i);
									active[i].state = STATE_FAILED;
									gettimeofday(&active[i].terminated, NULL);
									close(active[i].sd);
									++connections_failed;
								}
							} else {
								/* http reply code indicates failure */
								if(debug) printf("Connection %d: Undesired HTTP reply code: %d.\n", i, http_status);
								active[i].state = STATE_FAILED;
								gettimeofday(&active[i].terminated, NULL);
								close(active[i].sd);
								++connections_failed;
							}
						} else {
							/* no http-reply code found */
							if(debug) printf("Connection %d: No HTTP reply code found.\n", i);
							active[i].state = STATE_FAILED;
							gettimeofday(&active[i].terminated, NULL);
							close(active[i].sd);
							++connections_failed;
						}
					} else {
						if(debug) printf("Connection %d closed.\n", i);
						active[i].state = STATE_FAILED;
						gettimeofday(&active[i].terminated, NULL);
						close(active[i].sd);
						++connections_failed;
					}
				}
			}
		}


		for(i=0;i<num_actives;i++) {
			if(active[i].state == STATE_ATTEMPTING ||
			   active[i].state == STATE_CONNECTED ||
			   active[i].state == STATE_AWAITING_REPLY) {
				opt_len = sizeof(err_val);
				getsockopt(active[i].sd, SOL_SOCKET, SO_ERROR, &err_val, &opt_len);
				
				if(err_val == ECONNREFUSED) {
					if(debug) printf("connection %d: Connection refused.\n", i);
					active[i].state = STATE_REFUSED;
					/* even though it ECONNREF'd, we have to close() to free the descriptor */
					close(active[i].sd);
					gettimeofday(&active[i].terminated, NULL);
					++connections_refused;
				}
				
				else {
					if(debug) printf("connection %d: state = %d, sock err val = %d\n", i, active[i].state, err_val);
				}

				if(active[i].state == STATE_ATTEMPTING && err_val == 0) {
					getpeername(active[i].sd, NULL, NULL);
					if(errno == EFAULT) {
					// printf("errno = %d\n", errno);
						if(debug) printf("connection %d CONNECTED!\n", i);
						active[i].state = STATE_CONNECTED;
					}
				}
				
			}
		}

		/* check for any old connections-in-progress that we'll time out */
		gettimeofday(&now, NULL);
		for(i=0;i<num_actives;i++) {
			if(active[i].state == STATE_ATTEMPTING) {
				timersub(&now, &active[i].attempt_began, &elapsed);
				if( elapsed.tv_sec > connection_timeout_secs) {
					gettimeofday(&active[i].terminated, NULL);
					close(active[i].sd);
					active[i].state = STATE_TIMEOUT;
					++ports_timed_out;
				}
			}
		}

		for(i=0;i<num_actives;i++) {
			if(active[i].state == STATE_CONNECTED) {
				if(debug) printf("connection %d: Sending request\n", i);
				sprintf(request,"GET %s HTTP/1.0\r\n\r\n", url);
				send(active[i].sd, request, strlen(request), 0);
				gettimeofday(&active[i].last_tx, NULL);
				active[i].state = STATE_AWAITING_REPLY;
			}
			
			gettimeofday(&now, NULL);
			timersub(&now, &active[i].last_tx, &elapsed);
			if(active[i].state == STATE_AWAITING_REPLY && elapsed.tv_sec > http_reply_timeout) {
				gettimeofday(&active[i].terminated, NULL);
				close(active[i].sd);
				active[i].state = STATE_FAILED;
				++connections_failed;
			}
		}
		
		usleep(100000); /* cpu throttle, cooperative multitasking */
	}
}

void show_actives() {
	int i;
	struct timeval now, began_elapsed, tx_elapsed, termination_elapsed;
	int max_show = 15;

	gettimeofday(&now, NULL);

	printf("\033[1;1H"
	       "+---------------+  Load Average: %0.2f | Connection table: %d entries\n"
	       "| PROXY SCANNER |  Proxies Found: %3ld | Connected but failed to proxy: %6ld\n"
	       "+---------------+  Timed-out:  %6ld | Connection Refused: %6ld (%5.1f%%)\n",
	        load_avg(),
	        num_actives,
		proxies_found, connections_failed, ports_timed_out, connections_refused,
		100.0*connections_refused/(1+proxies_found+connections_failed+ports_timed_out+connections_refused));
		
	printf( "+------+-----------------+-------+---------+----------+----------+----------+\n"
		"| Conn | IP              | Port  | Status  | Started  | Last TX  | Stopped  |\n"
		"+------+-----------------+-------+---------+----------+----------+----------+\n");
	
	for(i=0;i<num_actives && i<max_show;i++) {
		timersub(&now, &active[i].attempt_began, &began_elapsed);
		timersub(&now, &active[i].last_tx,       &tx_elapsed);
		timersub(&now, &active[i].terminated,    &termination_elapsed);
		
		printf("| %04d | %03u.%03u.%03u.%03u | %05d | %-7s | ",
			i,
			active[i].ip[0],
			active[i].ip[1],
			active[i].ip[2],
			active[i].ip[3],
			active[i].port,
			active[i].state == STATE_INITIALIZED    ? "INIT":
			active[i].state == STATE_ATTEMPTING     ? "TRYING":
			active[i].state == STATE_TIMEOUT        ? "TIMEOUT":
			active[i].state == STATE_REFUSED        ? "REFUSED":
			active[i].state == STATE_CONNECTED      ? "CONNECT":
			active[i].state == STATE_AWAITING_REPLY ? "SENTREQ":
			active[i].state == STATE_SUCCESS        ? "SUCCESS":
			active[i].state == STATE_FAILED         ? "FAILED":
			"ERR"
		);
			
		if(timerisset(&active[i].attempt_began))
			printf("%3ld.%03ds",
				began_elapsed.tv_sec, (int)(began_elapsed.tv_usec/1000)
			);
		else printf("--NULL--");
		
		printf(" | ");

		if(timerisset(&active[i].last_tx))
			printf("%3ld.%03ds",
				tx_elapsed.tv_sec, (int)(tx_elapsed.tv_usec/1000)
			);
		else printf("--NULL--");

		printf(" | ");

		if(timerisset(&active[i].terminated))
			printf("%3ld.%03ds",
				termination_elapsed.tv_sec, (int)(termination_elapsed.tv_usec/1000)
			);
		else printf("--NULL--");
		
		printf(" |\n");
	}
	printf("+------+-----------------+-------+---------+----------+----------+----------+\n");
}

void new_connection(int i) {
	unsigned char busiest_a[] = {76,64,207,114,72,67,74,77,58,66,
		65,38,189,119,69,173,208,124,123,61,220};

	active[i].state = STATE_INITIALIZED;
	active[i].sd = -1;

	if(local_mode) {	
		active[i].ip[0] = 127;
		active[i].ip[1] = 0;
		active[i].ip[2] = 0;
		active[i].ip[3] = 1;
	} else {
		active[i].ip[0] = busiest_a[(int)(20.0*rand()/RAND_MAX)];
		active[i].ip[1] = (254.0*rand()/RAND_MAX);
		active[i].ip[2] = (254.0*rand()/RAND_MAX);
		active[i].ip[3] = 2+(251.0*rand()/RAND_MAX);
	}
	
	/* active[i].port = 23+(64000.0*rand()/RAND_MAX); */
	active[i].port = ports_to_scan[(int)(1.0*rand()*(sizeof(ports_to_scan)/sizeof(int))/RAND_MAX)];
                                                                                                                	
	timerclear(&active[i].attempt_began);
	timerclear(&active[i].last_tx);
	timerclear(&active[i].terminated);
	active[i].buf_bytes = 0;
}


float load_avg() {
#ifdef _LINUX_TYPES_H  
  FILE *loadavg;
  float one,five,fifteen;

  /* 0.54 0.23 0.19 2/141 6509 */
  if( (loadavg=fopen("/proc/loadavg","r")) ) {
    fscanf(loadavg,"%f %f %f ",&one,&five,&fifteen);
    fclose(loadavg);
    return one;
  } else return -1.0;
#endif

#ifdef _DARWIN_C_SOURCE
    int mib[2];
    struct loadavg la;
    mib[0] = CTL_VM;
    mib[1] = VM_LOADAVG;
    size_t size;
    
    size = sizeof(struct loadavg);
    
    if( sysctl(mib, 2, &la, &size, NULL, 0) < 0 ) {
      if(debug) printf("load_avg(): sysctl() fails to report load average.\n");
      return -1.0;
    } else {
      return (double)la.ldavg[0] / (double)la.fscale;
    }
#endif
    return -1.0;
}

/* save_proxy(): write the ip/port and time found for a proxy to disk */
void save_proxy(int i) {
	time_t now;
	
	FILE *f;
	
	time(&now);
	
	if( (f=fopen("proxyscan.dat", "a+")) ) {
		fprintf(f,"%u.%u.%u.%u,%d,%s",
			active[i].ip[0],
			active[i].ip[1],
			active[i].ip[2],
			active[i].ip[3],
			active[i].port,
			ctime(&now));
		fclose(f);
	}
}

