--- /dev/null
+WildSpeed - fast wildcard search for LIKE operator
+
+ Wildspeed extension provides GIN index support for wildcard search
+ for LIKE operator.
+
+ Online version of this document is available
+ http://www.sai.msu.su/~megera/wiki/wildspeed
+
+Authors
+
+ * Oleg Bartunov <oleg@sai.msu.su>, Moscow, Moscow University, Russia
+ * Teodor Sigaev <teodor@sigaev.ru>, Moscow, Moscow University,Russia
+
+License
+
+ Stable version, included into PostgreSQL distribution, released under
+ BSD license. Development version, available from this site, released
+ under the GNU General Public License, version 2 (June 1991)
+
+Downloads
+
+ Stable version of wildspeed is available from
+ http://www.sigaev.ru/cvsweb/cvsweb.cgi/wildspeed/
+
+Installation
+
+% cd PGSQLSRC/contrib
+% tar xzvf wildspeed.tar.gz
+% make
+% make install
+% make installcheck
+% psql DB < wildspeed.sql
+
+ Wildspeed provides opclass (wildcard_ops) and uses partial match
+ feature of GIN, available since 8.4. Also, it supports full index scan.
+
+ The size of index can be very big, since it contains entries for all
+ permutations of the original word, see [1] for details. For example,
+ word hello will be indexed as well as its all permutations:
+=# select permute('hello');
+ permute
+--------------------------------------
+ {hello$,ello$h,llo$he,lo$hel,o$hell}
+
+ Notice, symbol '$' is used only for visualization, in actual
+ implementation null-symbol '\0' is used.
+
+ Search query rewritten as prefix search:
+*X -> X$*
+X*Y -> Y$X*
+*X* -> X*
+
+ For example, search for 'hel*o' will be rewritten as 'o$hel'.
+
+ Special function permute(TEXT), which returns all permutations of
+ argument, provided for test purposes.
+
+ Performance of wildspeed depends on search pattern. Basically,
+ wildspeed is less effective than btree index with text_pattern_ops for
+ prefix search (the difference is greatly reduced for long prefixes) and
+ much faster for wildcard search.
+
+ Wildspeed by default uses optimization (skip short patterns if there
+ are long one), which can be turned off in Makefile by removing define
+ -DOPTIMIZE_WILDCARD_QUERY.
+
+References
+
+ 1. http://www.cs.wright.edu/~tkprasad/courses/cs499/L05TolerantIR.ppt, see also,
+ http://nlp.stanford.edu/IR-book/html/htmledition/permuterm-indexes-1.html
+
+Examples
+
+ Table words contains 747358 records, w1 and w2 columns contains the
+ same data in order to test performance of Btree (w1) and GIN (w2)
+ indexes:
+ Table "public.words"
+ Column | Type | Modifiers
+--------+------+-----------
+ w1 | text |
+ w2 | text |
+
+words=# create index bt_idx on words using btree (w1 text_pattern_ops);
+CREATE INDEX
+Time: 1885.195 ms
+words=# create index gin_idx on words using gin (w2 wildcard_ops);
+vacuum analyze;
+CREATE INDEX
+Time: 530351.223 ms
+
+Size:
+
+words=# select pg_relation_size('words');
+ pg_relation_size
+------------------
+ 43253760
+
+words=# select pg_relation_size('gin_idx');
+ pg_relation_size
+------------------
+ 417816576
+(1 row)
+
+words=# select pg_relation_size('bt_idx');
+ pg_relation_size
+------------------
+ 23437312
+(1 row)
+
+ Prefix search:
+words=# select count(*) from words where w1 like 'a%';
+ count
+-------
+ 15491
+(1 row)
+
+Time: 7.502 ms
+words=# select count(*) from words where w2 like 'a%';
+ count
+-------
+ 15491
+(1 row)
+
+Time: 31.152 ms
+
+ Wildcard search:
+words=# select count(*) from words where w1 like '%asd%';
+ count
+-------
+ 26
+(1 row)
+
+Time: 147.308 ms
+words=# select count(*) from words where w2 like '%asd%';
+ count
+-------
+ 26
+(1 row)
+
+Time: 0.339 ms
+
+ Full index scan:
+words=# set enable_seqscan to off;
+words=# explain analyze select count(*) from words where w2 like '%';
+ QUERY PLAN
+
+--------------------------------------------------------------------------------------------------------------
+-----------------------------
+ Aggregate (cost=226274.98..226274.99 rows=1 width=0) (actual time=2218.709..2218.709 rows=1 loops=1)
+ -> Bitmap Heap Scan on words (cost=209785.73..224406.77 rows=747283 width=0) (actual time=1510.516..1913.
+430 rows=747358 loops=1)
+ Filter: (w2 ~~ '%'::text)
+ -> Bitmap Index Scan on gin_idx (cost=0.00..209598.91 rows=747283 width=0) (actual time=1509.358..1
+509.358 rows=747358 loops=1)
+ Index Cond: (w2 ~~ '%'::text)
+ Total runtime: 2218.747 ms
+(6 rows)
+
--- /dev/null
+#include "postgres.h"
+
+#include "catalog/pg_type.h"
+#include "mb/pg_wchar.h"
+#include "utils/array.h"
+
+/*
+ * MARK_SIGN is a sign of end of string and this character should
+ * be regular character. The best candidate of this is a zero byte
+ * which is accepted for any locale used by postgres. But it's impossible
+ * to show it, so we will replace it to another one (MARK_SIGN_SHOW) which
+ * can be noticed well. But we can't use it as mark because it's allowed
+ * to be inside string.
+ */
+
+#define MARK_SIGN '\0'
+#define MARK_SIGN_SHOW '$'
+
+
+#define WC_BEGIN 0x01 /* should be in begining of string */
+#define WC_MIDDLE 0x02 /* should be in middle of string */
+#define WC_END 0x04 /* should be in end of string */
+
+PG_MODULE_MAGIC;
+
+static text*
+appendStrToText( text *src, char *str, int32 len, int32 maxlen )
+{
+ int32 curlen;
+
+ if (src == NULL )
+ {
+ Assert( maxlen >= 0 );
+ src = (text*)palloc( VARHDRSZ + sizeof(char*) * maxlen );
+ SET_VARSIZE(src, 0 + VARHDRSZ);
+ }
+
+ curlen = VARSIZE(src) - VARHDRSZ;
+
+ if (len>0)
+ memcpy( VARDATA(src) + curlen, str, len );
+
+ SET_VARSIZE(src, curlen + len + VARHDRSZ);
+
+ return src;
+}
+
+static text*
+appendMarkToText( text *src, int32 maxlen )
+{
+ char sign = MARK_SIGN;
+
+ return appendStrToText( src, &sign, 1, maxlen );
+}
+
+static text*
+setFlagOfText( char flag, int32 maxlen )
+{
+ char flagstruct[2];
+
+ Assert( maxlen > 0 );
+ /*
+ * Mark text by setting first byte to MARK_SIGN to indicate
+ * that text has flags. It's a safe for non empty string,
+ * because first character can not be a MARK_SIGN (see
+ * gin_extract_permuted() )
+ */
+
+ flagstruct[0] = MARK_SIGN;
+ flagstruct[1] = flag;
+
+ return appendStrToText(NULL, flagstruct, 2, maxlen );
+}
+
+PG_FUNCTION_INFO_V1(gin_extract_permuted);
+Datum gin_extract_permuted(PG_FUNCTION_ARGS);
+Datum
+gin_extract_permuted(PG_FUNCTION_ARGS)
+{
+ text *src = PG_GETARG_TEXT_P(0);
+ int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
+ Datum *entries = NULL;
+ int32 srclen = pg_mbstrlen_with_len(VARDATA(src), VARSIZE(src) - VARHDRSZ);
+
+ *nentries = srclen;
+
+ if ( srclen == 0 )
+ {
+ /*
+ * Empty string is encoded by alone MARK_SIGN character
+ */
+ *nentries = 1;
+ entries = (Datum*) palloc(sizeof(Datum));
+ entries[0] = PointerGetDatum( appendMarkToText( NULL, 1 ) );
+ }
+ else
+ {
+ text *dst;
+ int32 i,
+ offset=0; /* offset to current position in src in bytes */
+ int32 nbytes = VARSIZE(src) - VARHDRSZ;
+ char *srcptr = VARDATA(src);
+
+ /*
+ * Permutation: hello will be permuted to hello$, ello$h, llo$he, lo$hel, o$hell.
+ * So, number of entries is equial to number of characters (not a bytes)
+ */
+
+ entries = (Datum*)palloc(sizeof(char*) * nbytes );
+ for(i=0; i<srclen;i++) {
+
+ /*
+ * Copy first part. For llo$he it will be 'llo'
+ */
+ dst = appendStrToText( NULL, srcptr + offset, nbytes - offset, nbytes + 1 );
+
+ /*
+ * Set mark sign ($)
+ */
+ dst = appendMarkToText( dst, -1 );
+
+ /*
+ * Copy rest of string (in example above 'he')
+ */
+ dst = appendStrToText( dst, srcptr, offset, -1 );
+
+ entries[i] = PointerGetDatum(dst);
+ offset += pg_mblen( srcptr + offset );
+ }
+ }
+
+ PG_FREE_IF_COPY(src,0);
+ PG_RETURN_POINTER(entries);
+}
+
+PG_FUNCTION_INFO_V1(wildcmp);
+Datum wildcmp(PG_FUNCTION_ARGS);
+Datum
+wildcmp(PG_FUNCTION_ARGS)
+{
+ text *a = PG_GETARG_TEXT_P(0);
+ text *b = PG_GETARG_TEXT_P(1);
+ bool partialMatch = PG_GETARG_BOOL(2);
+ int32 cmp;
+ int lena,
+ lenb;
+ char *ptra = VARDATA(a),
+ *ptrb = VARDATA(b);
+ char flag = 0;
+
+ lena = VARSIZE(a) - VARHDRSZ;
+ lenb = VARSIZE(b) - VARHDRSZ;
+
+ /*
+ * sets correct pointers and lengths in case of flags
+ * presence
+ */
+ if ( lena > 2 && *ptra == MARK_SIGN )
+ {
+ flag = *(ptra+1);
+ ptra+=2;
+ lena-=2;
+
+ if ( lenb > 2 && *ptrb == MARK_SIGN )
+ {
+ /*
+ * If they have different flags then they can not be equal, this
+ * place works only during check of equality of keys
+ * to search
+ */
+ if ( flag != *(ptrb+1) )
+ return 1;
+ ptrb+=2;
+ lenb-=2;
+
+ /* b can not be a product of gin_extract_wildcard for partial match mode */
+ Assert( partialMatch == false );
+ }
+ }
+ else if ( lenb > 2 && *ptrb == MARK_SIGN )
+ {
+ /* b can not be a product of gin_extract_wildcard for partial match mode */
+ Assert( partialMatch == false );
+
+ ptrb+=2;
+ lenb-=2;
+ }
+
+ if ( lena == 0 )
+ {
+ if ( partialMatch )
+ cmp = 0; /* full scan for partialMatch*/
+ else
+ cmp = (lenb>0) ? -1 : 0;
+ }
+ else
+ {
+ /*
+ * We couldn't use strcmp because of MARK_SIGN
+ */
+ cmp = memcmp(ptra, ptrb, Min(lena, lenb));
+
+ if ( partialMatch )
+ {
+ if ( cmp == 0 )
+ {
+ if ( lena > lenb )
+ {
+ /*
+ * b argument is not beginning with argument a
+ */
+ cmp = 1;
+ }
+ else if ( flag > 0 && lenb>lena /* be safe */ )
+ { /* there is some flags to check */
+ char actualFlag;
+
+ if ( ptrb[ lenb - 1 ] == MARK_SIGN )
+ actualFlag = WC_BEGIN;
+ else if ( ptrb[ lena ] == MARK_SIGN )
+ actualFlag = WC_END;
+ else
+ actualFlag = WC_MIDDLE;
+
+ if ( (flag & actualFlag) == 0 )
+ {
+ /*
+ * Prefix are matched but this prefix s not placed as needed.
+ * so we should give a smoke signal to GIN that we don't want
+ * this match, but wish to continue scan
+ */
+ cmp = -1;
+ }
+ }
+ }
+ else if (cmp < 0)
+ {
+ cmp = 1; /* prevent continue scan */
+ }
+ }
+ else if ( (cmp == 0) && (lena != lenb) )
+ {
+ cmp = (lena < lenb) ? -1 : 1;
+ }
+ }
+
+ PG_FREE_IF_COPY(a,0);
+ PG_FREE_IF_COPY(b,1);
+ PG_RETURN_INT32( cmp );
+}
+
+#ifdef OPTIMIZE_WILDCARD_QUERY
+
+typedef struct
+{
+ Datum entry;
+ int32 len;
+ char flag;
+} OptItem;
+
+
+/*
+ * Function drops most short search word to speedup
+ * index search by preventing use word which gives
+ * a lot of matches
+ */
+static void
+optimize_wildcard_search( Datum *entries, int32 *nentries )
+{
+ int32 maxlen=0;
+ OptItem *items;
+ int i, nitems = *nentries;
+ char *ptr,*p;
+
+ items = (OptItem*)palloc( sizeof(OptItem) * (*nentries) );
+ for(i=0;i<nitems;i++)
+ {
+ items[i].entry = entries[i];
+ items[i].len = VARSIZE(entries[i]) - VARHDRSZ;
+ ptr = VARDATA(entries[i]);
+
+ if ( items[i].len > 2 && *ptr == MARK_SIGN )
+ {
+ items[i].len-=2;
+ items[i].flag = *(ptr+1);
+ }
+ else
+ {
+ items[i].flag = 0;
+ if ( items[i].len > 1 && (p=strchr(ptr, MARK_SIGN)) != NULL )
+ {
+ if ( p == ptr + items[i].len -1 )
+ items[i].flag = WC_BEGIN;
+ else
+ items[i].flag = WC_BEGIN | WC_END;
+ }
+ }
+
+ if ( items[i].len > maxlen )
+ maxlen = items[i].len;
+ }
+
+ *nentries=0;
+
+ for(i=0;i<nitems;i++)
+ {
+ if ( (items[i].flag & WC_BEGIN) && (items[i].flag & WC_END) )
+ { /* X$Y use always */
+ entries[ *nentries ] = items[i].entry;
+ (*nentries)++;
+ }
+ else if ( (items[i].flag & WC_MIDDLE) == 0 )
+ {
+ /*
+ * for begin-only or end-only word we set more low limit than for
+ * other variants
+ */
+ if ( 3*items[i].len > maxlen )
+ {
+ entries[ *nentries ] = items[i].entry;
+ (*nentries)++;
+ }
+ }
+ else if ( 2*items[i].len > maxlen )
+ {
+ /*
+ * use only items with biggest length
+ */
+ entries[ *nentries ] = items[i].entry;
+ (*nentries)++;
+ }
+ }
+
+ Assert( *nentries>0 );
+
+}
+#endif
+
+typedef struct
+{
+ bool iswildcard;
+ int32 len;
+ char *ptr;
+} WildItem;
+
+PG_FUNCTION_INFO_V1(gin_extract_wildcard);
+Datum gin_extract_wildcard(PG_FUNCTION_ARGS);
+Datum
+gin_extract_wildcard(PG_FUNCTION_ARGS)
+{
+ text *q = PG_GETARG_TEXT_P(0);
+ int32 lenq = VARSIZE(q) - VARHDRSZ;
+ int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
+#ifdef NOT_USED
+ StrategyNumber strategy = PG_GETARG_UINT16(2);
+#endif
+ bool *partialmatch,
+ **ptr_partialmatch = (bool**) PG_GETARG_POINTER(3);
+ Datum *entries = NULL;
+ char *qptr = VARDATA(q);
+ int clen,
+ splitqlen = 0,
+ i;
+ WildItem *items;
+ text *entry;
+
+ *nentries = 0;
+
+ if ( lenq == 0 )
+ {
+ partialmatch = *ptr_partialmatch = (bool*)palloc0(sizeof(bool));
+ *nentries = 1;
+ entries = (Datum*) palloc(sizeof(Datum));
+ entries[0] = PointerGetDatum( appendMarkToText( NULL, 1 ) );
+
+ PG_RETURN_POINTER(entries);
+ }
+
+ partialmatch = *ptr_partialmatch = (bool*)palloc0(sizeof(bool) * lenq);
+ entries = (Datum*) palloc(sizeof(Datum) * lenq);
+ items=(WildItem*) palloc0( sizeof(WildItem) * lenq );
+
+
+ /*
+ * Parse expression to the list of constant parts and
+ * wildcards
+ */
+ while( qptr - VARDATA(q) < lenq )
+ {
+ clen = pg_mblen(qptr);
+
+ if ( clen==1 && (*qptr == '_' || *qptr == '%' ) )
+ {
+ if ( splitqlen == 0 )
+ {
+ items[ splitqlen ].iswildcard = true;
+ splitqlen++;
+ }
+ else if ( items[ splitqlen-1 ].iswildcard == false )
+ {
+ items[ splitqlen-1 ].len = qptr - items[ splitqlen-1 ].ptr;
+ items[ splitqlen ].iswildcard = true;
+ splitqlen++;
+ }
+ /*
+ * ignore wildcard, because we don't make difference beetween
+ * %, _ or a combination of its
+ */
+ }
+ else
+ {
+ if ( splitqlen == 0 || items[ splitqlen-1 ].iswildcard == true )
+ {
+ items[ splitqlen ].ptr = qptr;
+ splitqlen++;
+ }
+ }
+ qptr += clen;
+ }
+
+ Assert( splitqlen >= 1 );
+ if ( items[ splitqlen-1 ].iswildcard == false )
+ items[ splitqlen-1 ].len = qptr - items[ splitqlen-1 ].ptr;
+
+ if ( items[ 0 ].iswildcard == false )
+ {
+ /* X... */
+ if ( splitqlen == 1 )
+ {
+ /* X => X$, exact match */
+ *nentries = 1;
+ entry = appendStrToText(NULL, items[ 0 ].ptr, items[ 0 ].len, lenq+1);
+ entry = appendMarkToText( entry, -1 );
+ entries[0] = PointerGetDatum( entry );
+ }
+ else if ( items[ splitqlen-1 ].iswildcard == false )
+ {
+ /* X * [X1 * [] ] ] Y => Y$X* [ + X1* [] ] */
+
+ *nentries = 1;
+ entry = appendStrToText(NULL, items[ splitqlen-1 ].ptr, items[ splitqlen-1 ].len, lenq+1);
+ entry = appendMarkToText( entry, -1 );
+ entry = appendStrToText(entry, items[ 0 ].ptr, items[ 0 ].len, -1);
+ partialmatch[0] = true;
+ entries[0] = PointerGetDatum( entry );
+
+ for(i=1; i<splitqlen-1; i++)
+ {
+ if ( items[ i ].iswildcard )
+ continue;
+ entry = setFlagOfText( WC_MIDDLE, lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
+ entry = appendStrToText(entry, items[ i ].ptr, items[ i ].len, -1 );
+ partialmatch[ *nentries ] = true;
+ entries[ *nentries ] = PointerGetDatum( entry );
+ (*nentries)++;
+ }
+ }
+ else
+ {
+ /* X * [ X1 * [] ] => X*$ [ + X1* [] ] */
+
+ entry = setFlagOfText( WC_BEGIN, lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
+ entry = appendStrToText(entry, items[ 0 ].ptr, items[ 0 ].len, -1);
+ *nentries = 1;
+ partialmatch[ 0 ] = true;
+ entries[0] = PointerGetDatum( entry );
+
+ for(i=2; i<splitqlen-1; i++)
+ {
+ if ( items[ i ].iswildcard )
+ continue;
+ entry = setFlagOfText( (i==splitqlen-2) ? (WC_MIDDLE | WC_END) : WC_MIDDLE,
+ lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
+ entry = appendStrToText(entry, items[ i ].ptr, items[ i ].len, -1);
+ partialmatch[ *nentries ] = true;
+ entries[ *nentries ] = PointerGetDatum( entry );
+ (*nentries)++;
+ }
+ }
+ }
+ else
+ {
+ /* *... */
+
+ if ( splitqlen == 1 )
+ {
+ /* any word => full scan */
+ *nentries = 1;
+ entry = appendStrToText(NULL, "", 0, lenq+1);
+ partialmatch[0] = true;
+ entries[0] = PointerGetDatum( entry );
+ }
+ else if ( items[ splitqlen-1 ].iswildcard == false )
+ {
+ /* * [ X1 * [] ] X => X$* [ + X1* [] ] */
+ *nentries = 1;
+ entry = appendStrToText(NULL, items[ splitqlen-1 ].ptr, items[ splitqlen-1 ].len, lenq+1);
+ entry = appendMarkToText( entry, -1 );
+ partialmatch[0] = true;
+ entries[0] = PointerGetDatum( entry );
+
+ for(i=1; i<splitqlen-1; i++)
+ {
+ if ( items[ i ].iswildcard )
+ continue;
+ entry = setFlagOfText( (i==1) ? (WC_MIDDLE | WC_BEGIN) : WC_MIDDLE,
+ lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
+ entry = appendStrToText(entry, items[ i ].ptr, items[ i ].len, -1);
+ partialmatch[ *nentries ] = true;
+ entries[ *nentries ] = PointerGetDatum( entry );
+ (*nentries)++;
+ }
+ }
+ else
+ {
+ /* * X [ * X1 [] ] * => X* [ + X1* [] ] */
+ for(i=1; i<splitqlen-1; i++)
+ {
+ if ( items[ i ].iswildcard )
+ continue;
+
+ if ( splitqlen > 3 )
+ {
+ if ( i==1 )
+ entry = setFlagOfText( WC_MIDDLE | WC_BEGIN, lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
+ else if ( i == splitqlen-2 )
+ entry = setFlagOfText( WC_MIDDLE | WC_END, lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
+ else
+ entry = setFlagOfText( WC_MIDDLE, lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
+ }
+ else
+ entry = NULL;
+ entry = appendStrToText(entry, items[ i ].ptr, items[ i ].len, lenq+1);
+ partialmatch[ *nentries ] = true;
+ entries[ *nentries ] = PointerGetDatum( entry );
+ (*nentries)++;
+ }
+ }
+ }
+
+ PG_FREE_IF_COPY(q,0);
+
+#ifdef OPTIMIZE_WILDCARD_QUERY
+ if ( *nentries > 1 )
+ optimize_wildcard_search( entries, nentries );
+#endif
+
+ PG_RETURN_POINTER(entries);
+}
+
+
+PG_FUNCTION_INFO_V1(gin_consistent_wildcard);
+Datum gin_consistent_wildcard(PG_FUNCTION_ARGS);
+Datum
+gin_consistent_wildcard(PG_FUNCTION_ARGS)
+{
+ bool *check = (bool *) PG_GETARG_POINTER(0);
+ bool res = true;
+ int i;
+ int32 nentries;
+
+ if ( fcinfo->flinfo->fn_extra == NULL )
+ {
+ bool *pmatch;
+
+ /*
+ * we need to get nentries, we'll get it by regular way
+ * and store it in function context
+ */
+
+ fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(int32));
+
+ DirectFunctionCall4(
+ gin_extract_wildcard,
+ PG_GETARG_DATUM(2), /* query */
+ PointerGetDatum( fcinfo->flinfo->fn_extra ), /* &nentries */
+ PG_GETARG_DATUM(1), /* strategy */
+ PointerGetDatum( &pmatch )
+ );
+ }
+
+ nentries = *(int32*) fcinfo->flinfo->fn_extra;
+
+ for (i = 0; res && i < nentries; i++)
+ if (check[i] == false)
+ res = false;
+
+ PG_RETURN_BOOL(res);
+}
+
+/*
+ * Mostly debug fuction
+ */
+PG_FUNCTION_INFO_V1(permute);
+Datum permute(PG_FUNCTION_ARGS);
+Datum
+permute(PG_FUNCTION_ARGS)
+{
+ Datum src = PG_GETARG_DATUM(0);
+ int32 nentries = 0;
+ Datum *entries;
+ ArrayType *res;
+ int i;
+
+ /*
+ * Get permuted values by gin_extract_permuted()
+ */
+ entries = (Datum*) DatumGetPointer(DirectFunctionCall2(
+ gin_extract_permuted, src, PointerGetDatum(&nentries)
+ ));
+
+ /*
+ * We need to replace MARK_SIGN to MARK_SIGN_SHOW.
+ * See comments above near definition of MARK_SIGN and MARK_SIGN_SHOW.
+ */
+ if ( nentries == 1 && VARSIZE(entries[0]) == VARHDRSZ + 1)
+ {
+ *(VARDATA(entries[0])) = MARK_SIGN_SHOW;
+ }
+ else
+ {
+ int32 offset = 0; /* offset of MARK_SIGN */
+ char *ptr;
+
+ /*
+ * We scan array from the end because it allows simple calculation
+ * of MARK_SIGN position: on every iteration it's moved one
+ * character to the end.
+ */
+ for(i=nentries-1;i>=0;i--)
+ {
+ ptr = VARDATA(entries[i]);
+
+ offset += pg_mblen(ptr);
+ Assert( *(ptr + offset) == MARK_SIGN );
+ *(ptr + offset) = MARK_SIGN_SHOW;
+ }
+ }
+
+ res = construct_array(
+ entries,
+ nentries,
+ TEXTOID,
+ -1,
+ false,
+ 'i'
+ );
+
+ PG_RETURN_POINTER(res);
+}