From: teodor <teodor>
Date: Fri, 4 Apr 2008 16:02:14 +0000 (+0000)
Subject: Initial revision
X-Git-Tag: start~1
X-Git-Url: http://sigaev.ru/git/gitweb.cgi?p=wildspeed.git;a=commitdiff_plain;h=ceecc7a42902dd4f7073c15d825df252817ba154

Initial revision
---

ceecc7a42902dd4f7073c15d825df252817ba154
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..8d36bf4
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,14 @@
+PG_CPPFLAGS = -DOPTIMIZE_WILDCARD_QUERY
+MODULE_big = wildspeed
+OBJS = wildspeed.o 
+
+DATA_built = wildspeed.sql
+DATA = uninstall_wildspeed.sql
+REGRESS = wildspeed
+
+
+subdir = contrib/wildspeed
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+include $(top_srcdir)/contrib/contrib-global.mk
diff --git a/README.wildspeed b/README.wildspeed
new file mode 100644
index 0000000..1bee021
--- /dev/null
+++ b/README.wildspeed
@@ -0,0 +1,158 @@
+WildSpeed - fast wildcard search for LIKE operator
+
+   Wildspeed extension provides GIN index support for wildcard search
+   for LIKE operator.
+
+   Online version of this document is available
+   http://www.sai.msu.su/~megera/wiki/wildspeed
+
+Authors
+
+     * Oleg Bartunov <oleg@sai.msu.su>, Moscow, Moscow University, Russia
+     * Teodor Sigaev <teodor@sigaev.ru>, Moscow, Moscow University,Russia
+
+License
+
+   Stable version, included into PostgreSQL distribution, released under
+   BSD license. Development version, available from this site, released
+   under the GNU General Public License, version 2 (June 1991)
+
+Downloads
+
+   Stable version of wildspeed is available from
+   http://www.sigaev.ru/cvsweb/cvsweb.cgi/wildspeed/
+
+Installation
+
+% cd PGSQLSRC/contrib
+% tar xzvf wildspeed.tar.gz
+% make
+% make install
+% make installcheck
+% psql DB < wildspeed.sql
+
+   Wildspeed provides opclass (wildcard_ops) and uses partial match
+   feature of GIN, available since 8.4. Also, it supports full index scan.
+
+   The size of index can be very big, since it contains entries for all
+   permutations of the original word, see [1] for details. For example,
+   word hello will be indexed as well as its all permutations:
+=# select permute('hello');
+               permute
+--------------------------------------
+ {hello$,ello$h,llo$he,lo$hel,o$hell}
+
+   Notice, symbol '$' is used only for visualization, in actual
+   implementation null-symbol '\0' is used.
+
+   Search query rewritten as prefix search:
+*X  -> X$*
+X*Y -> Y$X*
+*X* -> X*
+
+   For example, search for 'hel*o' will be rewritten as 'o$hel'.
+
+   Special function permute(TEXT), which returns all permutations of
+   argument, provided for test purposes.
+
+   Performance of wildspeed depends on search pattern. Basically,
+   wildspeed is less effective than btree index with text_pattern_ops for
+   prefix search (the difference is greatly reduced for long prefixes) and
+   much faster for wildcard search.
+
+   Wildspeed by default uses optimization (skip short patterns if there
+   are long one), which can be turned off in Makefile by removing define
+   -DOPTIMIZE_WILDCARD_QUERY.
+
+References
+
+   1.  http://www.cs.wright.edu/~tkprasad/courses/cs499/L05TolerantIR.ppt, see also,
+   http://nlp.stanford.edu/IR-book/html/htmledition/permuterm-indexes-1.html
+
+Examples
+
+   Table words contains 747358 records, w1 and w2 columns contains the
+   same data in order to test performance of Btree (w1) and GIN (w2)
+   indexes:
+   Table "public.words"
+ Column | Type | Modifiers
+--------+------+-----------
+ w1     | text |
+ w2     | text |
+
+words=# create index bt_idx on words using btree (w1 text_pattern_ops);
+CREATE INDEX
+Time: 1885.195 ms
+words=# create index gin_idx on words using gin (w2 wildcard_ops);
+vacuum analyze;
+CREATE INDEX
+Time: 530351.223 ms
+
+Size:
+
+words=# select pg_relation_size('words');
+ pg_relation_size
+------------------
+         43253760
+
+words=# select pg_relation_size('gin_idx');
+ pg_relation_size
+------------------
+        417816576
+(1 row)
+
+words=# select pg_relation_size('bt_idx');
+ pg_relation_size
+------------------
+         23437312
+(1 row)
+
+   Prefix search:
+words=# select count(*) from words where w1 like 'a%';
+ count
+-------
+ 15491
+(1 row)
+
+Time: 7.502 ms
+words=# select count(*) from words where w2 like 'a%';
+ count
+-------
+ 15491
+(1 row)
+
+Time: 31.152 ms
+
+   Wildcard search:
+words=# select count(*) from words where w1 like '%asd%';
+ count
+-------
+    26
+(1 row)
+
+Time: 147.308 ms
+words=# select count(*) from words where w2 like '%asd%';
+ count
+-------
+    26
+(1 row)
+
+Time: 0.339 ms
+
+   Full index scan:
+words=# set enable_seqscan to off;
+words=# explain analyze select count(*) from words where w2 like '%';
+                                                                QUERY PLAN
+
+--------------------------------------------------------------------------------------------------------------
+-----------------------------
+ Aggregate  (cost=226274.98..226274.99 rows=1 width=0) (actual time=2218.709..2218.709 rows=1 loops=1)
+   ->  Bitmap Heap Scan on words  (cost=209785.73..224406.77 rows=747283 width=0) (actual time=1510.516..1913.
+430 rows=747358 loops=1)
+         Filter: (w2 ~~ '%'::text)
+         ->  Bitmap Index Scan on gin_idx  (cost=0.00..209598.91 rows=747283 width=0) (actual time=1509.358..1
+509.358 rows=747358 loops=1)
+               Index Cond: (w2 ~~ '%'::text)
+ Total runtime: 2218.747 ms
+(6 rows)
+
diff --git a/data/testlike.data b/data/testlike.data
new file mode 100644
index 0000000..df8861e
--- /dev/null
+++ b/data/testlike.data
@@ -0,0 +1,9 @@
+hello
+hella
+hillo
+helko
+hekko
+jello
+jelko
+jella
+jeeeh
diff --git a/expected/wildspeed.out b/expected/wildspeed.out
new file mode 100644
index 0000000..b5a85fc
--- /dev/null
+++ b/expected/wildspeed.out
@@ -0,0 +1,346 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of wildspeed.sql.
+--
+SET client_min_messages = warning;
+\set ECHO none
+RESET client_min_messages;
+SELECT permute('');
+ permute 
+---------
+ {$}
+(1 row)
+
+SELECT permute('0');
+ permute 
+---------
+ {0$}
+(1 row)
+
+SELECT permute('01');
+  permute  
+-----------
+ {01$,1$0}
+(1 row)
+
+SELECT permute('hello');
+               permute                
+--------------------------------------
+ {hello$,ello$h,llo$he,lo$hel,o$hell}
+(1 row)
+
+CREATE TABLE testlike ( t text );
+\copy testlike from 'data/testlike.data'
+INSERT INTO testlike VALUES ('');
+SELECT * FROM testlike WHERE t LIKE '' ORDER BY t;
+ t 
+---
+ 
+(1 row)
+
+SELECT * FROM testlike WHERE t LIKE '%' ORDER BY t;
+   t   
+-------
+ 
+ hekko
+ helko
+ hella
+ hello
+ hillo
+ jeeeh
+ jelko
+ jella
+ jello
+(10 rows)
+
+SELECT * FROM testlike WHERE t LIKE 'hello' ORDER BY t;
+   t   
+-------
+ hello
+(1 row)
+
+SELECT * FROM testlike WHERE t LIKE 'hello%' ORDER BY t;
+   t   
+-------
+ hello
+(1 row)
+
+SELECT * FROM testlike WHERE t LIKE 'hel%' ORDER BY t;
+   t   
+-------
+ helko
+ hella
+ hello
+(3 rows)
+
+SELECT * FROM testlike WHERE t LIKE '%hello' ORDER BY t;
+   t   
+-------
+ hello
+(1 row)
+
+SELECT * FROM testlike WHERE t LIKE '%lo' ORDER BY t;
+   t   
+-------
+ hello
+ hillo
+ jello
+(3 rows)
+
+SELECT * FROM testlike WHERE t LIKE '%hello%' ORDER BY t;
+   t   
+-------
+ hello
+(1 row)
+
+SELECT * FROM testlike WHERE t LIKE '%ll%' ORDER BY t;
+   t   
+-------
+ hella
+ hello
+ hillo
+ jella
+ jello
+(5 rows)
+
+SELECT * FROM testlike WHERE t LIKE 'h%o' ORDER BY t;
+   t   
+-------
+ hekko
+ helko
+ hello
+ hillo
+(4 rows)
+
+SELECT * FROM testlike WHERE t LIKE 'h%l%o' ORDER BY t;
+   t   
+-------
+ helko
+ hello
+ hillo
+(3 rows)
+
+SELECT * FROM testlike WHERE t LIKE 'h%l%o' ORDER BY t;
+   t   
+-------
+ helko
+ hello
+ hillo
+(3 rows)
+
+SELECT * FROM testlike WHERE t LIKE 'h%e%l%o' ORDER BY t;
+   t   
+-------
+ helko
+ hello
+(2 rows)
+
+SELECT * FROM testlike WHERE t LIKE '%e%l%' ORDER BY t;
+   t   
+-------
+ helko
+ hella
+ hello
+ jelko
+ jella
+ jello
+(6 rows)
+
+SELECT * FROM testlike WHERE t LIKE '%e%o' ORDER BY t;
+   t   
+-------
+ hekko
+ helko
+ hello
+ jelko
+ jello
+(5 rows)
+
+SELECT * FROM testlike WHERE t LIKE 'h%o%' ORDER BY t;
+   t   
+-------
+ hekko
+ helko
+ hello
+ hillo
+(4 rows)
+
+SELECT * FROM testlike WHERE t LIKE 'j%e%' ORDER BY t;
+   t   
+-------
+ jeeeh
+ jelko
+ jella
+ jello
+(4 rows)
+
+SELECT * FROM testlike WHERE t LIKE 'j%k%' ORDER BY t;
+   t   
+-------
+ jelko
+(1 row)
+
+SELECT * FROM testlike WHERE t LIKE 'h%k%' ORDER BY t;
+   t   
+-------
+ hekko
+ helko
+(2 rows)
+
+CREATE INDEX like_idx ON testlike USING gin ( t wildcard_ops );
+SET enable_seqscan=off;
+SELECT * FROM testlike WHERE t LIKE '' ORDER BY t;
+ t 
+---
+ 
+(1 row)
+
+SELECT * FROM testlike WHERE t LIKE '%' ORDER BY t;
+   t   
+-------
+ 
+ hekko
+ helko
+ hella
+ hello
+ hillo
+ jeeeh
+ jelko
+ jella
+ jello
+(10 rows)
+
+SELECT * FROM testlike WHERE t LIKE 'hello' ORDER BY t;
+   t   
+-------
+ hello
+(1 row)
+
+SELECT * FROM testlike WHERE t LIKE 'hello%' ORDER BY t;
+   t   
+-------
+ hello
+(1 row)
+
+SELECT * FROM testlike WHERE t LIKE 'hel%' ORDER BY t;
+   t   
+-------
+ helko
+ hella
+ hello
+(3 rows)
+
+SELECT * FROM testlike WHERE t LIKE '%hello' ORDER BY t;
+   t   
+-------
+ hello
+(1 row)
+
+SELECT * FROM testlike WHERE t LIKE '%lo' ORDER BY t;
+   t   
+-------
+ hello
+ hillo
+ jello
+(3 rows)
+
+SELECT * FROM testlike WHERE t LIKE '%hello%' ORDER BY t;
+   t   
+-------
+ hello
+(1 row)
+
+SELECT * FROM testlike WHERE t LIKE '%ll%' ORDER BY t;
+   t   
+-------
+ hella
+ hello
+ hillo
+ jella
+ jello
+(5 rows)
+
+SELECT * FROM testlike WHERE t LIKE 'h%o' ORDER BY t;
+   t   
+-------
+ hekko
+ helko
+ hello
+ hillo
+(4 rows)
+
+SELECT * FROM testlike WHERE t LIKE 'h%l%o' ORDER BY t;
+   t   
+-------
+ helko
+ hello
+ hillo
+(3 rows)
+
+SELECT * FROM testlike WHERE t LIKE 'h%l%o' ORDER BY t;
+   t   
+-------
+ helko
+ hello
+ hillo
+(3 rows)
+
+SELECT * FROM testlike WHERE t LIKE 'h%e%l%o' ORDER BY t;
+   t   
+-------
+ helko
+ hello
+(2 rows)
+
+SELECT * FROM testlike WHERE t LIKE '%e%l%' ORDER BY t;
+   t   
+-------
+ helko
+ hella
+ hello
+ jelko
+ jella
+ jello
+(6 rows)
+
+SELECT * FROM testlike WHERE t LIKE '%e%o' ORDER BY t;
+   t   
+-------
+ hekko
+ helko
+ hello
+ jelko
+ jello
+(5 rows)
+
+SELECT * FROM testlike WHERE t LIKE 'h%o%' ORDER BY t;
+   t   
+-------
+ hekko
+ helko
+ hello
+ hillo
+(4 rows)
+
+SELECT * FROM testlike WHERE t LIKE 'j%e%' ORDER BY t;
+   t   
+-------
+ jeeeh
+ jelko
+ jella
+ jello
+(4 rows)
+
+SELECT * FROM testlike WHERE t LIKE 'j%k%' ORDER BY t;
+   t   
+-------
+ jelko
+(1 row)
+
+SELECT * FROM testlike WHERE t LIKE 'h%k%' ORDER BY t;
+   t   
+-------
+ hekko
+ helko
+(2 rows)
+
diff --git a/sql/wildspeed.sql b/sql/wildspeed.sql
new file mode 100644
index 0000000..d536e69
--- /dev/null
+++ b/sql/wildspeed.sql
@@ -0,0 +1,67 @@
+--
+-- first, define the datatype.  Turn off echoing so that expected file
+-- does not depend on contents of wildspeed.sql.
+--
+
+SET client_min_messages = warning;
+\set ECHO none
+\i wildspeed.sql
+\set ECHO all
+RESET client_min_messages;
+
+
+SELECT permute('');
+SELECT permute('0');
+SELECT permute('01');
+SELECT permute('hello');
+
+CREATE TABLE testlike ( t text );
+\copy testlike from 'data/testlike.data'
+INSERT INTO testlike VALUES ('');
+
+SELECT * FROM testlike WHERE t LIKE '' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE '%' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE 'hello' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE 'hello%' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE 'hel%' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE '%hello' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE '%lo' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE '%hello%' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE '%ll%' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE 'h%o' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE 'h%l%o' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE 'h%l%o' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE 'h%e%l%o' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE '%e%l%' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE '%e%o' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE 'h%o%' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE 'j%e%' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE 'j%k%' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE 'h%k%' ORDER BY t;
+
+CREATE INDEX like_idx ON testlike USING gin ( t wildcard_ops );
+
+SET enable_seqscan=off;
+
+SELECT * FROM testlike WHERE t LIKE '' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE '%' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE 'hello' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE 'hello%' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE 'hel%' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE '%hello' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE '%lo' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE '%hello%' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE '%ll%' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE 'h%o' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE 'h%l%o' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE 'h%l%o' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE 'h%e%l%o' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE '%e%l%' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE '%e%o' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE 'h%o%' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE 'j%e%' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE 'j%k%' ORDER BY t;
+SELECT * FROM testlike WHERE t LIKE 'h%k%' ORDER BY t;
+
+
+
diff --git a/uninstall_wildspeed.sql b/uninstall_wildspeed.sql
new file mode 100644
index 0000000..978c142
--- /dev/null
+++ b/uninstall_wildspeed.sql
@@ -0,0 +1,10 @@
+BEGIN;
+
+DROP FUNCTION IF EXISTS permute(text) CASCADE;
+DROP OPERATOR CLASS IF EXISTS wildcard_ops USING gin CASCADE;
+DROP FUNCTION IF EXISTS gin_extract_permuted(text, internal)  CASCADE;
+DROP FUNCTION IF EXISTS wildcmp(text, text, bool) CASCADE;
+DROP FUNCTION IF EXISTS gin_extract_wildcard(text, internal, int2, internal) CASCADE;
+DROP FUNCTION IF EXISTS gin_consistent_wildcard(internal, int2, text) CASCADE;
+
+END;
diff --git a/wildspeed.c b/wildspeed.c
new file mode 100644
index 0000000..389dff8
--- /dev/null
+++ b/wildspeed.c
@@ -0,0 +1,651 @@
+#include "postgres.h"
+
+#include "catalog/pg_type.h"
+#include "mb/pg_wchar.h"
+#include "utils/array.h"
+
+/*
+ * MARK_SIGN is a sign of end of string and this character should
+ * be regular character. The best candidate of this is a zero byte
+ * which is accepted for any locale used by postgres. But it's impossible
+ * to show it, so we will replace it to another one (MARK_SIGN_SHOW) which 
+ * can be noticed well. But we can't use it as mark because it's allowed
+ * to be inside string.
+ */ 
+
+#define		MARK_SIGN		'\0'
+#define		MARK_SIGN_SHOW	'$'
+
+
+#define WC_BEGIN		0x01   /* should be in begining of string */
+#define WC_MIDDLE		0x02   /* should be in middle of string */
+#define WC_END			0x04   /* should be in end of string */
+
+PG_MODULE_MAGIC;
+
+static text*
+appendStrToText( text *src, char *str, int32 len, int32 maxlen )
+{
+	int32	curlen;
+
+	if (src == NULL )
+	{
+		Assert( maxlen >= 0 );
+		src = (text*)palloc( VARHDRSZ + sizeof(char*) * maxlen );
+		SET_VARSIZE(src, 0 + VARHDRSZ);
+	}
+
+	curlen = VARSIZE(src) - VARHDRSZ;
+
+	if (len>0)
+		memcpy( VARDATA(src) + curlen, str, len );
+
+	SET_VARSIZE(src, curlen + len + VARHDRSZ);
+
+	return src;
+}
+
+static text*
+appendMarkToText( text *src, int32 maxlen )
+{
+	char sign = MARK_SIGN;
+
+	return appendStrToText( src, &sign, 1, maxlen );
+}
+
+static text*
+setFlagOfText( char flag, int32 maxlen )
+{
+	char flagstruct[2];
+
+	Assert( maxlen > 0 );
+	/*
+	 * Mark text by setting first byte to MARK_SIGN to indicate
+	 * that text has flags. It's a safe for non empty string, 
+	 * because first character can not be a MARK_SIGN (see
+	 * gin_extract_permuted() )
+	 */
+
+	flagstruct[0] = MARK_SIGN;
+	flagstruct[1] = flag;
+	
+	return appendStrToText(NULL, flagstruct, 2, maxlen );
+}
+
+PG_FUNCTION_INFO_V1(gin_extract_permuted);
+Datum		gin_extract_permuted(PG_FUNCTION_ARGS);
+Datum
+gin_extract_permuted(PG_FUNCTION_ARGS)
+{
+	text	*src = PG_GETARG_TEXT_P(0);
+	int32	*nentries = (int32 *) PG_GETARG_POINTER(1);
+	Datum   *entries = NULL;
+	int32	srclen = pg_mbstrlen_with_len(VARDATA(src), VARSIZE(src) - VARHDRSZ);
+
+	*nentries = srclen;
+
+	if ( srclen == 0 )
+	{
+		/*
+		 * Empty string is encoded by alone MARK_SIGN character
+		 */
+		*nentries = 1;
+		entries = (Datum*) palloc(sizeof(Datum));
+		entries[0] = PointerGetDatum( appendMarkToText( NULL, 1 ) );
+	}
+	else
+	{
+		text 	*dst;
+		int32 	i, 
+				offset=0; /* offset to current position in src in bytes */ 
+		int32	nbytes = VARSIZE(src) - VARHDRSZ;
+		char	*srcptr = VARDATA(src);
+
+		/*
+		 * Permutation: hello will be permuted to hello$, ello$h, llo$he, lo$hel, o$hell.
+		 * So, number of entries is equial to number of characters (not a bytes)
+		 */
+
+		entries = (Datum*)palloc(sizeof(char*) * nbytes );
+		for(i=0; i<srclen;i++) {
+		
+			/*
+			 * Copy first part. For llo$he it will be 'llo'
+			 */
+			dst = appendStrToText( NULL, srcptr + offset, nbytes - offset, nbytes + 1 ); 
+
+			/*
+			 * Set mark sign ($)
+			 */
+			dst = appendMarkToText( dst, -1 );
+
+			/*
+			 * Copy rest of string (in example above 'he')
+			 */
+			dst = appendStrToText( dst, srcptr, offset, -1 );
+
+			entries[i] = PointerGetDatum(dst);
+			offset += pg_mblen( srcptr + offset );
+		}
+	}
+
+	PG_FREE_IF_COPY(src,0);
+	PG_RETURN_POINTER(entries);
+}
+
+PG_FUNCTION_INFO_V1(wildcmp);
+Datum       wildcmp(PG_FUNCTION_ARGS);
+Datum
+wildcmp(PG_FUNCTION_ARGS)
+{
+	text	*a = PG_GETARG_TEXT_P(0);
+	text	*b = PG_GETARG_TEXT_P(1);
+	bool	partialMatch = PG_GETARG_BOOL(2);
+	int32	cmp;
+	int		lena,
+			lenb;
+	char 	*ptra = VARDATA(a),
+			*ptrb = VARDATA(b);
+	char	flag = 0;
+
+	lena = VARSIZE(a) - VARHDRSZ;
+	lenb = VARSIZE(b) - VARHDRSZ;
+
+	/*
+	 * sets correct pointers and lengths in case of flags
+	 * presence
+	 */
+	if ( lena > 2 && *ptra == MARK_SIGN )
+	{
+		flag = *(ptra+1);
+		ptra+=2;
+		lena-=2;
+
+		if ( lenb > 2 && *ptrb == MARK_SIGN )
+		{
+			/*
+			 * If they have different flags then they can not be equal, this 
+			 * place works only during check of equality of keys
+			 * to search
+			 */
+			if ( flag != *(ptrb+1) )
+				return 1;
+			ptrb+=2;
+			lenb-=2;
+
+			/* b can not be a product of gin_extract_wildcard for partial match mode */
+			Assert( partialMatch == false );
+		}
+	} 
+	else  if ( lenb > 2 && *ptrb == MARK_SIGN )
+	{
+		/* b can not be a product of gin_extract_wildcard for partial match mode */
+		Assert( partialMatch == false );
+
+		ptrb+=2;
+		lenb-=2;
+	}
+
+	if ( lena == 0 )
+	{
+		if ( partialMatch )
+			cmp = 0; /* full scan for partialMatch*/
+		else
+			cmp = (lenb>0) ? -1 : 0;
+	}
+	else
+	{
+		/*
+		 * We couldn't use strcmp because of MARK_SIGN
+		 */
+		cmp = memcmp(ptra, ptrb, Min(lena, lenb));
+
+		if ( partialMatch )
+		{
+			if ( cmp == 0 )
+			{
+				if ( lena > lenb )
+				{
+					/*
+					 * b argument is not beginning with argument a
+					 */
+					cmp = 1;
+				}
+				else if ( flag > 0 && lenb>lena /* be safe */ )
+				{ /* there is some flags to check */
+					char	actualFlag;
+
+					if ( ptrb[ lenb - 1 ] == MARK_SIGN )
+						actualFlag = WC_BEGIN;  
+					else if ( ptrb[ lena ] == MARK_SIGN )
+						actualFlag = WC_END;
+					else
+						actualFlag = WC_MIDDLE;
+
+					if ( (flag & actualFlag) == 0 )
+					{
+						/* 
+						 * Prefix are matched but this prefix s not placed as needed.
+						 * so we should give a smoke signal to GIN that we don't want
+						 * this match, but wish to continue scan 
+						 */
+						cmp = -1;
+					}
+				}
+			} 
+			else if (cmp < 0)
+			{
+				cmp = 1; /* prevent continue scan */
+			}
+		} 
+		else if ( (cmp == 0) && (lena != lenb) )
+		{
+			cmp = (lena < lenb) ? -1 : 1;
+		}
+	}
+
+	PG_FREE_IF_COPY(a,0);
+	PG_FREE_IF_COPY(b,1);
+	PG_RETURN_INT32( cmp );	
+}
+
+#ifdef OPTIMIZE_WILDCARD_QUERY
+
+typedef struct 
+{
+	Datum	entry;
+	int32	len;
+	char	flag;
+} OptItem;
+
+
+/*
+ * Function drops most short search word to speedup 
+ * index search by preventing use word which gives
+ * a lot of matches
+ */
+static void 
+optimize_wildcard_search( Datum *entries, int32 *nentries )
+{
+	int32	maxlen=0;
+	OptItem	*items;
+	int		i, nitems = *nentries;
+	char 	*ptr,*p;
+
+	items = (OptItem*)palloc( sizeof(OptItem) * (*nentries) );
+	for(i=0;i<nitems;i++)
+	{
+		items[i].entry = entries[i];
+		items[i].len = VARSIZE(entries[i]) - VARHDRSZ;
+		ptr = VARDATA(entries[i]);
+
+		if ( items[i].len > 2 && *ptr == MARK_SIGN )
+		{
+			items[i].len-=2;
+			items[i].flag = *(ptr+1);
+		}
+		else
+		{
+			items[i].flag = 0;
+			if ( items[i].len > 1 && (p=strchr(ptr, MARK_SIGN)) != NULL )
+			{
+				if ( p == ptr + items[i].len -1 )
+					items[i].flag = WC_BEGIN;
+				else 
+					items[i].flag = WC_BEGIN | WC_END;
+			}
+		}
+
+		if ( items[i].len > maxlen )
+			maxlen = items[i].len;
+	}
+	
+	*nentries=0;
+
+	for(i=0;i<nitems;i++)
+	{
+		if ( (items[i].flag & WC_BEGIN) && (items[i].flag & WC_END) )
+		{	/* X$Y use always */
+			entries[ *nentries ] = items[i].entry;
+			(*nentries)++;
+		}
+		else if ( (items[i].flag & WC_MIDDLE) == 0 )
+		{ 
+			/* 
+			 * for begin-only or end-only word we set more low limit than for 
+			 * other variants
+			 */
+			if ( 3*items[i].len > maxlen )
+			{
+				entries[ *nentries ] = items[i].entry;
+				(*nentries)++;
+			}
+		}
+		else if ( 2*items[i].len > maxlen )
+		{	
+			/* 
+			 * use only items with biggest length 
+			 */
+			entries[ *nentries ] = items[i].entry;
+			(*nentries)++;
+		}
+	}
+
+	Assert( *nentries>0 );
+
+}
+#endif
+
+typedef struct 
+{
+	bool	iswildcard;
+	int32	len;
+	char	*ptr;
+} WildItem;
+
+PG_FUNCTION_INFO_V1(gin_extract_wildcard);
+Datum		gin_extract_wildcard(PG_FUNCTION_ARGS);
+Datum
+gin_extract_wildcard(PG_FUNCTION_ARGS)
+{
+	text			*q = PG_GETARG_TEXT_P(0);
+	int32			lenq = VARSIZE(q) - VARHDRSZ;
+	int32			*nentries = (int32 *) PG_GETARG_POINTER(1);
+#ifdef NOT_USED
+	StrategyNumber 	strategy = PG_GETARG_UINT16(2);
+#endif
+	bool			*partialmatch, 
+					**ptr_partialmatch = (bool**) PG_GETARG_POINTER(3);
+	Datum      		*entries = NULL;
+	char			*qptr = VARDATA(q);
+	int				clen,
+					splitqlen = 0,
+					i;
+	WildItem		*items;
+	text			*entry;
+
+	*nentries = 0;
+
+	if ( lenq == 0 )
+	{
+		partialmatch = *ptr_partialmatch = (bool*)palloc0(sizeof(bool));
+		*nentries = 1;
+		entries = (Datum*) palloc(sizeof(Datum));
+		entries[0] = PointerGetDatum( appendMarkToText( NULL, 1 ) );
+
+		PG_RETURN_POINTER(entries);
+	}
+
+	partialmatch = *ptr_partialmatch = (bool*)palloc0(sizeof(bool) * lenq);
+	entries = (Datum*) palloc(sizeof(Datum) * lenq);
+	items=(WildItem*) palloc0( sizeof(WildItem) * lenq );
+
+
+	/*
+	 * Parse expression to the list of constant parts and
+	 * wildcards
+	 */
+	while( qptr - VARDATA(q) < lenq )
+	{
+		clen = pg_mblen(qptr);
+
+		if ( clen==1 && (*qptr == '_' || *qptr == '%' ) )
+		{
+			if ( splitqlen == 0 )
+			{
+				items[ splitqlen ].iswildcard = true;
+				splitqlen++;
+			} 
+			else if ( items[ splitqlen-1 ].iswildcard == false )
+			{
+				items[ splitqlen-1 ].len = qptr - items[ splitqlen-1 ].ptr;
+				items[ splitqlen ].iswildcard = true;
+				splitqlen++;
+			}
+			/*
+			 * ignore wildcard, because we don't make difference beetween
+			 * %, _ or a combination of its
+			 */
+		}
+		else
+		{
+			if ( splitqlen == 0 || items[ splitqlen-1 ].iswildcard == true )
+			{
+				items[ splitqlen ].ptr = qptr;
+				splitqlen++;
+			}
+		}
+		qptr += clen;
+	}
+
+	Assert( splitqlen >= 1 );
+	if ( items[ splitqlen-1 ].iswildcard == false )
+		items[ splitqlen-1 ].len = qptr - items[ splitqlen-1 ].ptr;
+
+	if ( items[ 0 ].iswildcard == false )
+	{
+		/* X... */
+		if ( splitqlen == 1 )
+		{
+			/*   X => X$, exact match */
+			*nentries = 1;
+			entry = appendStrToText(NULL, items[ 0 ].ptr, items[ 0 ].len, lenq+1);
+			entry = appendMarkToText( entry, -1 );
+			entries[0] = PointerGetDatum( entry );
+		} 
+		else if ( items[ splitqlen-1 ].iswildcard == false ) 
+		{
+			/*   X * [X1 * [] ] ] Y => Y$X* [ + X1* [] ] */
+
+			*nentries = 1;
+			entry = appendStrToText(NULL, items[ splitqlen-1 ].ptr, items[ splitqlen-1 ].len, lenq+1);
+			entry = appendMarkToText( entry, -1 );
+			entry = appendStrToText(entry, items[ 0 ].ptr, items[ 0 ].len, -1);
+			partialmatch[0] = true;
+			entries[0] = PointerGetDatum( entry );
+
+			for(i=1; i<splitqlen-1; i++)
+			{
+				if ( items[ i ].iswildcard )
+					continue;
+				entry = setFlagOfText( WC_MIDDLE, lenq + 1 /* MARK_SIGN */ + 2 /* flag */ ); 
+				entry = appendStrToText(entry, items[ i ].ptr, items[ i ].len, -1 );
+				partialmatch[ *nentries ] = true;
+				entries[ *nentries ] =  PointerGetDatum( entry );
+				(*nentries)++;
+			}
+		}
+		else
+		{
+			/*   X * [ X1 * [] ]  => X*$ [ + X1* [] ] */
+		
+			entry = setFlagOfText( WC_BEGIN, lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
+			entry = appendStrToText(entry, items[ 0 ].ptr, items[ 0 ].len, -1);
+			*nentries = 1;
+			partialmatch[ 0 ] = true;
+			entries[0] = PointerGetDatum( entry );
+
+			for(i=2; i<splitqlen-1; i++)
+			{
+				if ( items[ i ].iswildcard )
+					continue;
+				entry = setFlagOfText( (i==splitqlen-2) ? (WC_MIDDLE | WC_END) : WC_MIDDLE, 
+										lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
+				entry = appendStrToText(entry, items[ i ].ptr, items[ i ].len, -1);
+				partialmatch[ *nentries ] = true;
+				entries[ *nentries ] =  PointerGetDatum( entry );
+				(*nentries)++;
+			}
+		}
+	} 
+	else
+	{
+		/* *...  */
+
+		if ( splitqlen == 1 )
+		{
+			/* any word => full scan */
+			*nentries = 1;
+			entry = appendStrToText(NULL, "", 0, lenq+1);
+			partialmatch[0] = true;
+			entries[0] = PointerGetDatum( entry );
+		}
+		else if ( items[ splitqlen-1 ].iswildcard == false )
+		{
+			/*     * [ X1 * [] ] X  => X$* [ + X1* [] ]  */
+			*nentries = 1;
+			entry = appendStrToText(NULL, items[ splitqlen-1 ].ptr, items[ splitqlen-1 ].len, lenq+1);
+			entry = appendMarkToText( entry, -1 );
+			partialmatch[0] = true;
+			entries[0] = PointerGetDatum( entry );
+
+			for(i=1; i<splitqlen-1; i++)
+			{
+				if ( items[ i ].iswildcard )
+					continue;
+				entry = setFlagOfText( (i==1) ? (WC_MIDDLE | WC_BEGIN) : WC_MIDDLE, 
+										lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
+				entry = appendStrToText(entry, items[ i ].ptr, items[ i ].len, -1);
+				partialmatch[ *nentries ] = true;
+				entries[ *nentries ] =  PointerGetDatum( entry );
+				(*nentries)++;
+			}
+		}
+		else
+		{
+			/* * X [ * X1 [] ] * => X* [ + X1* [] ] */
+			for(i=1; i<splitqlen-1; i++)
+			{
+				if ( items[ i ].iswildcard )
+					continue;
+
+				if ( splitqlen > 3 )
+				{
+					if ( i==1 )
+						entry = setFlagOfText( WC_MIDDLE | WC_BEGIN, lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
+					else if ( i == splitqlen-2 )
+						entry = setFlagOfText( WC_MIDDLE | WC_END, lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
+					else
+						entry = setFlagOfText( WC_MIDDLE, lenq + 1 /* MARK_SIGN */ + 2 /* flag */ ); 
+				}
+				else
+					entry = NULL;
+				entry = appendStrToText(entry, items[ i ].ptr, items[ i ].len, lenq+1);
+				partialmatch[ *nentries ] = true;
+				entries[ *nentries ] =  PointerGetDatum( entry );
+				(*nentries)++;
+			}
+		}
+	}
+
+	PG_FREE_IF_COPY(q,0);
+
+#ifdef OPTIMIZE_WILDCARD_QUERY
+	if ( *nentries > 1 )
+		optimize_wildcard_search( entries, nentries );
+#endif
+
+	PG_RETURN_POINTER(entries);
+}
+
+
+PG_FUNCTION_INFO_V1(gin_consistent_wildcard);
+Datum       gin_consistent_wildcard(PG_FUNCTION_ARGS);
+Datum
+gin_consistent_wildcard(PG_FUNCTION_ARGS)
+{
+	bool       	*check = (bool *) PG_GETARG_POINTER(0);
+	bool        res = true;
+	int         i;
+	int32 		nentries;
+
+	if ( fcinfo->flinfo->fn_extra == NULL )
+	{
+		bool	*pmatch;
+
+		/*
+		 * we need to get nentries, we'll get it by regular way
+		 * and store it in function context
+		 */
+
+		fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+														sizeof(int32));
+
+		DirectFunctionCall4(
+					gin_extract_wildcard,
+					PG_GETARG_DATUM(2),  /* query */
+					PointerGetDatum( fcinfo->flinfo->fn_extra ), /* &nentries */
+					PG_GETARG_DATUM(1),  /* strategy */
+					PointerGetDatum( &pmatch )
+		);
+	}
+
+	nentries = *(int32*) fcinfo->flinfo->fn_extra;
+
+	for (i = 0; res && i < nentries; i++)
+		if (check[i] == false)
+			res = false;
+
+	PG_RETURN_BOOL(res);
+}
+
+/*
+ * Mostly debug fuction
+ */
+PG_FUNCTION_INFO_V1(permute);
+Datum       permute(PG_FUNCTION_ARGS);
+Datum
+permute(PG_FUNCTION_ARGS)
+{
+	Datum		src = PG_GETARG_DATUM(0);
+	int32		nentries = 0;
+	Datum 		*entries;
+	ArrayType	*res;
+	int 		i;
+
+	/*
+	 * Get permuted values by gin_extract_permuted()
+	 */
+	entries = (Datum*) DatumGetPointer(DirectFunctionCall2(
+					gin_extract_permuted, src, PointerGetDatum(&nentries)
+			));
+
+	/*
+	 * We need to replace MARK_SIGN to MARK_SIGN_SHOW.
+	 * See comments above near definition of MARK_SIGN and MARK_SIGN_SHOW.
+	 */
+	if ( nentries == 1 && VARSIZE(entries[0]) == VARHDRSZ + 1)
+	{
+		*(VARDATA(entries[0])) = MARK_SIGN_SHOW;		
+	}
+	else
+	{
+		int32  	offset = 0; /* offset of MARK_SIGN */
+		char	*ptr;
+
+		/*
+		 * We scan array from the end because it allows simple calculation
+		 * of MARK_SIGN position: on every iteration it's moved one 
+		 * character to the end.
+		 */
+		for(i=nentries-1;i>=0;i--) 
+		{
+			ptr = VARDATA(entries[i]);
+
+			offset += pg_mblen(ptr);
+			Assert( *(ptr + offset) == MARK_SIGN );
+			*(ptr + offset) = MARK_SIGN_SHOW;
+		}
+	}
+
+	res = construct_array(
+					entries,
+					nentries,
+					TEXTOID,
+					-1,
+					false,
+					'i'
+			);
+
+	PG_RETURN_POINTER(res);
+}
diff --git a/wildspeed.sql.in b/wildspeed.sql.in
new file mode 100644
index 0000000..a8c5cdb
--- /dev/null
+++ b/wildspeed.sql.in
@@ -0,0 +1,41 @@
+BEGIN;
+
+-- support functions for gin
+CREATE OR REPLACE FUNCTION gin_extract_permuted(text, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION wildcmp(text, text, bool)
+RETURNS int32
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION gin_extract_wildcard(text, internal, int2, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION gin_consistent_wildcard(internal, int2, text)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE;
+
+CREATE OPERATOR CLASS wildcard_ops
+FOR TYPE text USING gin
+AS
+    OPERATOR        1       ~~ RECHECK,
+	FUNCTION        1       wildcmp(text,text,bool),
+	FUNCTION        2       gin_extract_permuted(text, internal),
+	FUNCTION        3       gin_extract_wildcard(text, internal, int2, internal),
+	FUNCTION        4       gin_consistent_wildcard(internal, int2, text),
+STORAGE         text;
+
+
+--debug function
+CREATE OR REPLACE FUNCTION permute(text)
+RETURNS _text
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT IMMUTABLE;
+
+COMMIT;