add .gitignore
[tedtools.git] / regis.c
1 /*
2  * Copyright (c) 2004 Teodor Sigaev <teodor@sigaev.ru>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *        notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *        notice, this list of conditions and the following disclaimer in the
12  *        documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the author nor the names of any co-contributors
14  *        may be used to endorse or promote products derived from this software
15  *        without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
18  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
25  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
27  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 #include <string.h>
31 #include <ctype.h>
32          
33 #include "tlog.h"
34 #include "tmalloc.h"
35 #include "regis.h"
36
37
38 int
39 RS_isRegis(const char *str) {
40         unsigned char *ptr=(unsigned char *)str;
41
42         while(ptr && *ptr) 
43                 if ( isalpha(*ptr) || *ptr=='[' || *ptr==']' || *ptr=='^')
44                         ptr++;
45                 else
46                         return 0;
47         return 1; 
48 }
49
50 #define RS_IN_ONEOF     1
51 #define RS_IN_ONEOF_IN  2
52 #define RS_IN_NONEOF    3
53 #define RS_IN_WAIT      4
54
55 static RegisNode*
56 newRegisNode(RegisNode *prev, int len) {
57         RegisNode       *ptr;
58         ptr = (RegisNode*)t0malloc(RNHDRSZ+len+1);
59         if (prev)
60                 prev->next=ptr;
61         return ptr;
62 }
63
64 int
65 RS_compile(Regis *r, int issuffix, const char *str) {
66         int i,len = strlen(str);
67         int state = RS_IN_WAIT;
68         RegisNode       *ptr=NULL;
69
70         memset(r,0,sizeof(Regis));
71         r->issuffix = (issuffix) ? 1 : 0;
72
73         for(i=0;i<len;i++) {
74                 unsigned char c = *( ( (unsigned char*)str ) + i );
75                 if ( state == RS_IN_WAIT ) {
76                         if ( isalpha(c) ) {
77                                 if ( ptr ) 
78                                         ptr = newRegisNode(ptr,len);
79                                 else
80                                         ptr = r->node = newRegisNode(NULL,len);
81                                 ptr->data[ 0 ] = c;
82                                 ptr->type = RSF_ONEOF;
83                                 ptr->len=1;
84                         } else if ( c=='[' )  {
85                                 if ( ptr ) 
86                                         ptr = newRegisNode(ptr,len);
87                                 else
88                                         ptr = r->node = newRegisNode(NULL,len);
89                                 ptr->type = RSF_ONEOF;
90                                 state=RS_IN_ONEOF;
91                         } else 
92                                 tlog(TL_ALARM|TL_EXIT,"Error in regis: %s at pos %d\n", str, i+1);
93                 } else if ( state == RS_IN_ONEOF ) {
94                         if ( c=='^' ) {
95                                 ptr->type = RSF_NONEOF;
96                                 state=RS_IN_NONEOF;
97                         } else if ( isalpha(c) ) {
98                                 ptr->data[ 0 ] = c;
99                                 ptr->len=1;
100                                 state=RS_IN_ONEOF_IN;
101                         } else
102                                 tlog(TL_ALARM|TL_EXIT,"Error in regis: %s at pos %d\n", str,  i+1);
103                 } else if ( state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF ) {
104                         if ( isalpha(c) ) {
105                                 ptr->data[ ptr->len ] = c;
106                                 ptr->len++;
107                         } else if ( c==']' ) {
108                                 state=RS_IN_WAIT;
109                         } else
110                                 tlog(TL_ALARM|TL_EXIT,"Error in regis: %s at pos %d\n", str,  i+1);
111                 } else
112                         tlog(TL_CRIT|TL_EXIT,"Internal error in RS_compile: %d\n", state);
113         }
114
115         ptr = r->node;
116         while(ptr) {
117                 r->nchar++;
118                 ptr=ptr->next;
119         }
120
121         return 0;
122 }
123
124 void 
125 RS_free(Regis *r) {
126         RegisNode *ptr=r->node,*tmp;
127
128         while(ptr) {
129                 tmp=ptr->next;
130                 tfree(ptr);
131                 ptr = tmp;
132         }
133
134         r->node = NULL;
135 }
136
137 int 
138 RS_execute(Regis *r, const char *str, int len) {
139         RegisNode *ptr=r->node;
140         unsigned char *c;
141
142         if (len<0)
143                 len=strlen(str);
144
145         if (len<r->nchar)
146                 return 0;
147
148         if ( r->issuffix ) 
149                 c = ((unsigned char*)str) + len - r->nchar;
150         else
151                 c = (unsigned char*)str; 
152
153         while(ptr) {
154                 switch(ptr->type) {
155                         case RSF_ONEOF:
156                                 if ( ptr->len==0 ) {
157                                         if ( *c != *(ptr->data) )
158                                                 return 0;
159                                 } else if ( strchr((char*)ptr->data, *c) == NULL )
160                                         return 0;
161                                 break;
162                         case RSF_NONEOF:
163                                 if ( ptr->len==0 ) {
164                                         if ( *c == *(ptr->data) )
165                                                 return 0;
166                                 } else if ( strchr((char*)ptr->data, *c) != NULL )
167                                         return 0;
168                                 break;
169                         default:
170                                 tlog(TL_CRIT|TL_EXIT,"RS_execute: Unknown type node: %d\n", ptr->type);
171                 }
172                 ptr=ptr->next;
173                 c++;
174         }
175
176         return 1;
177 }