bes Updated for version 3.20.13
BESRegex.cc
1
2// -*- mode: c++; c-basic-offset:4 -*-
3
4// This file is part of libdap, A C++ implementation of the OPeNDAP Data
5// Access Protocol.
6
7// Copyright (c) 2005 OPeNDAP, Inc.
8// Author: James Gallagher <jgallagher@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26
27//#define DODS_DEBUG
28
29#include "config.h"
30
31#if 0
32#ifndef WIN32
33#include <alloca.h>
34#endif
35#include <stdlib.h>
36
37#include <sys/types.h>
38#include <regex.h>
39
40#include <new>
41#include <string>
42#include <vector>
43#include <stdexcept>
44#endif
45
46#include <string>
47#include <vector>
48
49#include <regex.h>
50
51//#include <libdap/Error.h>
52#include <libdap/debug.h>
53#include <libdap/util.h>
54
55#include "BESError.h"
56#include "BESRegex.h"
57
58#if 0
59#include "util.h"
60#include "debug.h"
61#endif
62
63using namespace std;
64
65void
66BESRegex::init(const char *t)
67{
68#if !USE_CPP_11_REGEX
69 d_preg = static_cast<void*>(new regex_t);
70
71 int result = regcomp(static_cast<regex_t*>(d_preg), t, REG_EXTENDED);
72 if (result != 0) {
73 size_t msg_len = regerror(result, static_cast<regex_t*>(d_preg),
74 static_cast<char*>(nullptr),
75 static_cast<size_t>(0));
76
77 vector<char> msg(msg_len+1);
78 regerror(result, static_cast<regex_t*>(d_preg), msg.data(), msg_len);
79 string err = string("BESRegex error: ") + string(msg.data(), msg_len);
80 throw BESError(err, BES_SYNTAX_USER_ERROR, __FILE__, __LINE__);
81 }
82#else
83 d_exp = regex(t);
84#endif
85}
86
87#if 0
88void
89BESRegex::init(const string &t)
90{
91 d_exp = regex(t);
92}
93#endif
94
95#if !USE_CPP_11_REGEX
96BESRegex::~BESRegex()
97{
98 regfree(static_cast<regex_t*>(d_preg));
99 delete static_cast<regex_t*>(d_preg); d_preg = 0;
100}
101#endif
102
103#if 0
107BESRegex::BESRegex(const char* t)
108{
109 init(t);
110}
111
114BESRegex::BESRegex(const char* t, int)
115{
116 init(t);
117}
118#endif
119
126int
127BESRegex::match(const char *s, int len, int pos) const
128{
129#if !USE_CPP_11_REGEX
130 if (len > 32766) // Integer overflow protection
131 return -1;
132
133 regmatch_t *pmatch = new regmatch_t[len+1];
134 string ss = s;
135
136 int result = regexec(static_cast<regex_t*>(d_preg),
137 ss.substr(pos, len-pos).c_str(), len, pmatch, 0);
138 int matchnum;
139 if (result == REG_NOMATCH)
140 matchnum = -1;
141 else
142 matchnum = pmatch[0].rm_eo - pmatch[0].rm_so;
143
144 delete[] pmatch; pmatch = 0;
145
146 return matchnum;
147#else
148 if (pos > len)
149 throw Error("Position exceed length in BESRegex::match()");
150
151 smatch match;
152 auto target = string(s+pos, len-pos);
153 bool found = regex_search(target, match, d_exp);
154 if (found)
155 return (int)match.length();
156 else
157 return -1;
158#endif
159}
160
166int
167BESRegex::match(const string &s) const
168{
169#if USE_CPP_11_REGEX
170 smatch match;
171 bool found = regex_search(s, match, d_exp);
172 if (found)
173 return (int)match.length();
174 else
175 return -1;
176#else
177 return match(s.c_str(), s.length(), 0);
178#endif
179}
180
191int
192BESRegex::search(const char *s, int len, int& matchlen, int pos) const
193{
194#if !USE_CPP_11_REGEX
195 // sanitize allocation
196 if (!libdap::size_ok(sizeof(regmatch_t), len+1))
197 return -1;
198
199 // alloc space for len matches, which is theoretical max.
200 // Problem: If somehow 'len' is very large - say the size of a 32-bit int,
201 // then len+1 is a an integer overflow and this might be exploited by
202 // an attacker. It's not likely there will be more than a handful of
203 // matches, so I am going to limit this value to 32766. jhrg 3/4/09
204 if (len > 32766)
205 return -1;
206
207 regmatch_t *pmatch = new regmatch_t[len+1];
208 string ss = s;
209
210 int result = regexec(static_cast<regex_t*>(d_preg),
211 ss.substr(pos, len-pos).c_str(), len, pmatch, 0);
212 if (result == REG_NOMATCH) {
213 delete[] pmatch; pmatch = 0;
214 return -1;
215 }
216
217 // Match found, find the first one (pmatch lists the longest first)
218 int m = 0;
219 for (int i = 1; i < len; ++i)
220 if (pmatch[i].rm_so != -1 && pmatch[i].rm_so < pmatch[m].rm_so)
221 m = i;
222
223 matchlen = pmatch[m].rm_eo - pmatch[m].rm_so;
224 int matchpos = pmatch[m].rm_so;
225
226 delete[] pmatch; pmatch = 0;
227 return matchpos;
228#else
229 smatch match;
230 // This is needed because in C++14, the first arg to regex_search() cannot be a
231 // temporary string. It seems the C++11 compilers on some linux dists are using
232 // regex headers that enforce c++14 rules. jhrg 12/2/21
233 auto target = string(s+pos, len-pos);
234 bool found = regex_search(target, match, d_exp);
235 matchlen = (int)match.length();
236 if (found)
237 return (int)match.position();
238 else
239 return -1;
240#endif
241}
242
249int
250BESRegex::search(const string &s, int& matchlen) const
251{
252#if USE_CPP_11_REGEX
253 smatch match;
254 bool found = regex_search(s, match, d_exp);
255 matchlen = (int)match.length();
256 if (found)
257 return (int)match.position();
258 else
259 return -1;
260#else
261 // search(const char *s, int len, int& matchlen, int pos) const
262 return search(s.c_str(), s.length(), matchlen, 0);
263#endif
264}
265
Base exception class for the BES with basic string message.
Definition: BESError.h:59
BESRegex(const char *s)
initialize a BESRegex with a C string
Definition: BESRegex.h:77
int match(const char *s, int len, int pos=0) const
Does the pattern match.
Definition: BESRegex.cc:127
int search(const char *s, int len, int &matchlen, int pos=0) const
How much of the string does the pattern match.
Definition: BESRegex.cc:192