--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+static void
+usage() {
+ printf("Clear text from lib.ru for Pocket PC, Version 0.2.\n");
+ printf("Author: Teodor Sigaev <teodor@sigaev.ru>\n");
+ printf("Usage:\n clrlibru [-i INPUTFILE] [-o OUTPUT] [-l NUMSPACE]\n");
+ exit(0);
+}
+
+char* RemoveTag[]={
+ "select",
+ "head",
+ "div",
+ "a",
+ "form",
+ NULL
+};
+
+static int
+is_rtag(char *tag, int len) {
+ char **ptr=RemoveTag;
+
+ tag[len]='\0';
+ while( *ptr ) {
+ if ( strcmp(tag, *ptr) == 0 )
+ return 1;
+ ptr++;
+ }
+ return 0;
+}
+
+
+static char *optarg = NULL;
+static int current=1;
+
+int
+mgetopt(int argn, char* argv[], char *option) {
+ char key;
+
+ if ( current >= argn ) return -1;
+
+ key = *(argv[current]+1);
+ if ( *(argv[current]) == '-' || *(argv[current]) == '/' ) {
+ char *ptr = strchr( option, key );
+ if ( ptr == NULL ) {
+ printf( "Unknown option: %s\n", argv[current]);
+ exit(1);
+ }
+ current++;
+ if ( *(ptr+1) == ':' ) {
+ if (current < argn) {
+ optarg=argv[current];
+ current++;
+ } else {
+ printf("No value for -%c\n" ,key);
+ exit(1);
+ }
+ } else
+ optarg=NULL;
+ } else {
+ printf("Unknown option: %s\n", argv[current]);
+ exit(1);
+ }
+ return (int)key;
+}
+
+#define INTXT 0
+#define INTAG 1
+#define FINDEND 2
+#define INDROPTAG 3
+#define INDROPINTAG 4
+#define INHEADTAG 5
+#define INDROPINCLSTAG 6
+
+
+int
+main(int argn, char *argv[]) {
+ int ch;
+ FILE *in=stdin, *out=stdout;
+ int state=INTXT;
+ char buf[8192];
+ int lenbuf=0,closelen=0;
+ int spacelen=4;
+
+ while((ch = mgetopt(argn, argv, "l:i:o:h?"))!=-1) {
+ switch (ch) {
+ case 'i':
+ if ( (in=fopen(optarg, "r"))==NULL) {
+ printf("Can't open file %s\n", optarg);
+ exit(1);
+ }
+ break;
+ case 'o':
+ if ( (out=fopen(optarg, "w"))==NULL) {
+ printf("Can't open file %s\n",optarg);
+ exit(1);
+ }
+ break;
+ case 'l':
+ spacelen = atoi(optarg);
+ if ( spacelen < 0 ) {
+ printf("-l should be >= 0\n");
+ exit(1);
+ }
+ break;
+ case 'h':
+ case '?':
+ default:
+ usage();
+ }
+ }
+
+ while( (ch=getc(in)) != EOF ) {
+ if ( state==INTXT ) {
+ if ( ch == '<' ) {
+ state=INHEADTAG;
+ lenbuf=0;
+ } else if ( ch == '\n' ) {
+ state=FINDEND;
+ lenbuf=1;
+ *buf = ch;
+ } else if ( ch != '\r' )
+ fputc(ch,out);
+ } else if ( state==INHEADTAG ) {
+ if ( isalpha(ch) ) {
+ buf[ lenbuf ] = tolower(ch);
+ lenbuf++;
+ } else if ( ch == '>' ) {
+ if ( is_rtag(buf,lenbuf) ) {
+ state = INDROPTAG;
+ closelen=0;
+ } else {
+ state=INTXT;
+ fputc(' ',out);
+ }
+ } else if ( lenbuf == 0 && ch != '/' ) {
+ fputc('<',out); fputc(ch,out);
+ state=INTXT;
+ } else {
+ if ( is_rtag(buf,lenbuf) ) {
+ state = INDROPTAG;
+ closelen=0;
+ } else {
+ state=INTAG;
+ fputc(' ',out);
+ }
+ }
+ } else if ( state==INTAG ) {
+ if ( ch == '>' ) {
+ state=INTXT;
+ fputc(' ',out);
+ }
+ } else if ( state == INDROPTAG ) {
+ if ( ch == '<' ) {
+ state=INDROPINTAG;
+ closelen=0;
+ }
+ } else if ( state == INDROPINTAG ) {
+ if ( ch == '/' )
+ state=INDROPINCLSTAG;
+ else
+ state=INDROPTAG;
+ } else if ( state == INDROPINCLSTAG ) {
+ if ( isalpha(ch) ) {
+ if ( closelen < lenbuf && tolower(ch) == buf[closelen] ) {
+ closelen++;
+ if ( closelen==lenbuf )
+ state=INTAG;
+ } else
+ state=INDROPTAG;
+ } else
+ state=INDROPTAG;
+ } else if ( state==FINDEND ) {
+ if ( ch == ' ' ) {
+ buf[ lenbuf ] = ch;
+ lenbuf++;
+ if ( lenbuf > spacelen ) {
+ fwrite(buf, sizeof(char), lenbuf, out);
+ state=INTXT;
+ }
+ } else if ( ch=='\n' ) {
+ buf[ lenbuf ] = ch;
+ lenbuf++;
+ fwrite(buf, sizeof(char), lenbuf, out);
+ state=INTXT;
+ } else if ( ch !='\r' ) {
+ state=INTXT;
+ fputc(' ',out);
+ ungetc(ch,in);
+ }
+ } else {
+ printf("Unknown state: %d\n", state);
+ exit(1);
+ }
+ }
+
+ if ( in!=stdin )
+ fclose(in);
+ if ( out!=stdout )
+ fclose(out);
+
+ return 0;
+}
+