Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import json 

2import six 

3import re 

4 

5# (?<!\\) - don't match leading slashes 

6# (?:\\\\)* - allow any even number of slashes 

7# (\.) - capture the actual separator 

8PERIOD_SPLIT = re.compile(r'(?<!\\)(?:\\\\)*(\.)') 

9OPEN_SQUARE_BRACKET_SPLIT = re.compile(r'(?<!\\)(?:\\\\)*(\[)') 

10EQUAL_SPLIT = re.compile(r'(?<!\\)(?:\\\\)*(=)') 

11TIDLE_SPLIT = re.compile(r'(?<!\\)(?:\\\\)*(~)') 

12 

13SINGLE_SLASH = re.compile(r'(?<!\\)(\\)') 

14 

15 

16def _non_quoted_split(regex, string): 

17 indices = list(regex.finditer(string)) 

18 retval = [] 

19 for x, y in zip([None]+indices, indices+[None]): 

20 retval.append(string[x.end(1) if x else 0:y.start(1) if y else None]) 

21 return retval 

22 

23 

24def _un_slash_escape(string): 

25 return SINGLE_SLASH.sub('', string).replace('\\\\', '\\') 

26 

27 

28def _get_next_mapped_value_for_key(mapped_value, key, found_value, path_parts, path_parts_index, path_parts_break): 

29 try: 

30 if isinstance(mapped_value, six.string_types): 

31 # ugh, maybe it is json? 

32 try: 

33 mapped_value = json.loads(mapped_value) 

34 except ValueError: 

35 raise ValueError( 

36 'string found when looking for dict-like object at {!r}. failed to convert to json.'.format( 

37 '.'.join(path_parts[:path_parts_index]) 

38 ) 

39 ) 

40 if hasattr(mapped_value, 'keys'): 

41 mapped_value = mapped_value[key] 

42 else: 

43 found_value = False 

44 path_parts_break = True 

45 except KeyError: 

46 found_value = False 

47 path_parts_break = True 

48 return found_value, mapped_value, path_parts_break 

49 

50 

51def _array_part_is_digit(mapped_value, array_part, key, path_parts, path_parts_index): 

52 # [0] 

53 try: 

54 mapped_value = mapped_value[int(array_part)] 

55 except KeyError: 

56 raise ValueError('array expected at {!r}, found dict-like object.'.format( 

57 '.'.join(path_parts[:path_parts_index] + [key]) 

58 )) 

59 except IndexError: 

60 raise IndexError('index {!r} out of range on array at {!r}.'.format( 

61 int(array_part), 

62 '.'.join(path_parts[:path_parts_index] + [key]) 

63 )) 

64 return mapped_value 

65 

66 

67def _array_part_is_key_or_sub_key_equal(found_value, mapped_value, array_part, path_parts_break): 

68 # [Key=Value] or [Key~SubKey=Value] 

69 # split on non quoted equals signs 

70 array_part_break = False 

71 equal_parts = _non_quoted_split(EQUAL_SPLIT, array_part) 

72 find_key = equal_parts[0] 

73 find_value = equal_parts[1:] 

74 # future: when dropping python 2 support do this instead. 

75 # find_key, *find_value = _non_quoted_split(EQUAL_SPLIT, array_part) 

76 if len(find_value) >= 2: 

77 raise ValueError('too many unquoted equals signs in square brackets for {!r}'.format(array_part)) 

78 find_value = find_value[0] 

79 if find_value.isdigit(): 

80 find_value = int(find_value) 

81 elif find_value.startswith('"') and find_value.endswith('"'): 

82 find_value = find_value[1:-1] 

83 if isinstance(find_value, six.string_types): 

84 find_value = _un_slash_escape(find_value) 

85 for item in [mapped_value] if hasattr(mapped_value, 'keys') else mapped_value: 

86 sub_item = item 

87 sub_keys = _non_quoted_split(TIDLE_SPLIT, find_key) 

88 try: 

89 while sub_keys: 

90 sub_key = _un_slash_escape(sub_keys.pop(0)) 

91 sub_item = sub_item[sub_key] 

92 except (KeyError, IndexError): 

93 pass 

94 else: 

95 if sub_item == find_value: 

96 mapped_value = item 

97 break 

98 else: 

99 # raise KeyError('no item with %r == %r' % (find_key, find_value)) 

100 found_value = False 

101 path_parts_break = True # break the outer loop, we are done here. 

102 array_part_break = True 

103 return found_value, mapped_value, array_part_break, path_parts_break 

104 

105 

106def _array_part_is_whole_array(found_value, mapped_value, key, path_parts, path_parts_index): 

107 # empty [] 

108 if hasattr(mapped_value, 'keys'): 

109 raise ValueError('array expected at {!r}, found dict-like object.'.format( 

110 '.'.join(path_parts[:path_parts_index] + [key]) 

111 )) 

112 if not mapped_value: 

113 if path_parts[path_parts_index + 1:]: 

114 found_value = False 

115 else: 

116 remainder = '.'.join(path_parts[path_parts_index + 1:]) 

117 mapped_value = [resolve_path_to_value(x, remainder) for x in mapped_value] 

118 mapped_value = [value for found, value in mapped_value if found] 

119 if not mapped_value: 

120 found_value = False 

121 return found_value, mapped_value 

122 

123 

124def resolve_path_to_value(source, path): 

125 r""" 

126 fetch a value out of `source` using `path` as the pointer to the desired value. 

127 

128 a `path` should be in one of or a combination of the following formats: 

129 - dictionary keys using dot notation 

130 key.subkey 

131 - array item using square bracket notation 

132 key[0] 

133 - find dict in array using keys 

134 key[Key=Value] 

135 - find dict in array using sub keys 

136 key[Key~SubKey=Value] 

137 

138 if the substring `Value` `isdigit()`, we look for an `int` version. You can wrap `'8'` into `'"8"'` to find the 

139 `string` version. 

140 

141 examples: 

142 >>> source_dict = { 

143 ... 'first_key': 'a', 

144 ... 'second_key' : ['x', 'y', 'z'], 

145 ... 'third_key' : [ 

146 ... {'c': 'asdf'}, 

147 ... {'b': 3}, 

148 ... {'b': '5'}, 

149 ... {'h': 'qw"er'} 

150 ... ], 

151 ... 'fourth_key': [ 

152 ... { 

153 ... 'd': {'f': 5, 'g': 6}, 

154 ... 'e': {'f': 7, 'g': 8} 

155 ... }, 

156 ... { 

157 ... 'd': {'f': 9, 'g': 10}, 

158 ... 'e': {'f': 11, 'g': 12} 

159 ... } 

160 ... ], 

161 ... 'fifth_key': [ 

162 ... {'b.c': '9.a'}, 

163 ... {'b[c': '9[a'}, 

164 ... {'b]c': '9]a'}, 

165 ... {'b\c': '9\\a'}, 

166 ... ], 

167 ... 'sixth_key': { 

168 ... 'a': [ 

169 ... {'b':6}, 

170 ... {'b':5}, 

171 ... {'b':4}, 

172 ... ], 

173 ... 'c': [ 

174 ... {'d':100}, 

175 ... {'d':{'e': 3}}, 

176 ... {'d':{'e': 2}}, 

177 ... ], 

178 ... 'f': [] 

179 ... }, 

180 ... 'seventh_key': { 

181 ... 'bad_api': '{"z":1,"y":2,"x":3}', 

182 ... 'bad_json': '{"z":1!"y":2,"x":3}', 

183 ... } 

184 ... } 

185 >>> resolve_path_to_value(source_dict, 'zero_key')[0] 

186 False 

187 >>> resolve_path_to_value(source_dict, 'first_key') 

188 (True, 'a') 

189 >>> resolve_path_to_value(source_dict, 'second_key[1]') 

190 (True, 'y') 

191 >>> resolve_path_to_value(source_dict, 'second_key[4]') 

192 Traceback (most recent call last): 

193 ... 

194 IndexError: index 4 out of range on array at 'second_key'. 

195 >>> resolve_path_to_value(source_dict, 'third_key[b=3]') 

196 (True, {'b': 3}) 

197 >>> resolve_path_to_value(source_dict, 'third_key[b=4]')[0] 

198 False 

199 >>> resolve_path_to_value(source_dict, 'third_key[b="5"]') 

200 (True, {'b': '5'}) 

201 >>> resolve_path_to_value(source_dict, 'third_key[h=qw"er]') 

202 (True, {'h': 'qw"er'}) 

203 >>> resolve_path_to_value(source_dict, 'third_key[c=asdf].c') 

204 (True, 'asdf') 

205 >>> resolve_path_to_value(source_dict, 'third_key[c=asdf].b') 

206 (False, {'c': 'asdf'}) 

207 >>> resolve_path_to_value(source_dict, 'fourth_key[d~g=6].e.f') 

208 (True, 7) 

209 >>> resolve_path_to_value(source_dict, r'fifth_key[b\.c=9\.a].b\.c') 

210 (True, '9.a') 

211 >>> resolve_path_to_value(source_dict, r'fifth_key[b\[c=9\[a].b\[c') 

212 (True, '9[a') 

213 >>> resolve_path_to_value(source_dict, r'fifth_key[b\]c=9\]a].b\]c') 

214 (True, '9]a') 

215 >>> resolve_path_to_value(source_dict, r'fifth_key[b\\c=9\\a].b\\c') 

216 (True, '9\\a') 

217 >>> resolve_path_to_value(source_dict, 'sixth_key.a[].b') 

218 (True, [6, 5, 4]) 

219 >>> resolve_path_to_value(source_dict, 'sixth_key.c[].d.e') 

220 (True, [3, 2]) 

221 >>> resolve_path_to_value(source_dict, 'sixth_key.c[].x') 

222 (False, []) 

223 >>> resolve_path_to_value(source_dict, 'sixth_key.f') 

224 (True, []) 

225 >>> resolve_path_to_value(source_dict, 'sixth_key.f[]') 

226 (True, []) 

227 >>> resolve_path_to_value(source_dict, 'sixth_key.f[].g') 

228 (False, []) 

229 >>> resolve_path_to_value(source_dict, 'seventh_key.bad_api.x') 

230 (True, 3) 

231 >>> results = resolve_path_to_value(source_dict, 'seventh_key.bad_api.a') 

232 >>> results[0] 

233 False 

234 >>> results[1] == {'x': 3, 'y': 2, 'z': 1} 

235 True 

236 >>> resolve_path_to_value(source_dict, 'seventh_key.bad_api[bad-squares]') 

237 Traceback (most recent call last): 

238 ... 

239 ValueError: Bad square brackets syntax on 'bad-squares' 

240 >>> resolve_path_to_value(source_dict, 'seventh_key.bad_api[a=b=c=]') 

241 Traceback (most recent call last): 

242 ... 

243 ValueError: too many unquoted equals signs in square brackets for 'a=b=c=' 

244 >>> resolve_path_to_value(source_dict, 'seventh_key[0]') 

245 Traceback (most recent call last): 

246 ... 

247 ValueError: array expected at 'seventh_key', found dict-like object. 

248 >>> resolve_path_to_value(source_dict, 'seventh_key[]') 

249 Traceback (most recent call last): 

250 ... 

251 ValueError: array expected at 'seventh_key', found dict-like object. 

252 >>> resolve_path_to_value(source_dict, 'seventh_key.bad_json.z') 

253 Traceback (most recent call last): 

254 ... 

255 ValueError: string found when looking for dict-like object at 'seventh_key.bad_json'. failed to convert to json. 

256 

257 :param source: potentially holds the desired value 

258 :type source: dict 

259 :param path: points to the desired value 

260 :type path: six.string_types 

261 :returns: a boolean indicating found status, the value that was found 

262 :rtype: tuple 

263 :raises ValueError: if we don't understand what went inside some square brackets. 

264 """ 

265 mapped_value = source 

266 found_value = True 

267 path_parts_break = False 

268 

269 path_parts = _non_quoted_split(PERIOD_SPLIT, path) 

270 

271 for path_parts_index, path_part_raw in enumerate(path_parts): 

272 # split on non quoted open bracket 

273 

274 parts = _non_quoted_split(OPEN_SQUARE_BRACKET_SPLIT, path_part_raw) 

275 key = parts[0] 

276 array = parts[1:] 

277 # future: when dropping python 2 support do this instead. 

278 # key, *array = _non_quoted_split(OPEN_SQUARE_BRACKET_SPLIT, path_part_raw) 

279 

280 key = _un_slash_escape(key) 

281 found_value, mapped_value, path_parts_break = _get_next_mapped_value_for_key( 

282 mapped_value, key, found_value, path_parts, path_parts_index, path_parts_break 

283 ) 

284 if path_parts_break: 

285 break 

286 for array_part_raw in array: 

287 array_part = array_part_raw.strip(']') 

288 if array_part.isdigit(): 

289 mapped_value = _array_part_is_digit( 

290 mapped_value, array_part, key, path_parts, path_parts_index 

291 ) 

292 elif '=' in array_part: 

293 found_value, mapped_value, array_part_break, path_parts_break = _array_part_is_key_or_sub_key_equal( 

294 found_value, mapped_value, array_part, path_parts_break 

295 ) 

296 if array_part_break: 

297 break 

298 elif array_part == '': 

299 found_value, mapped_value = _array_part_is_whole_array( 

300 found_value, mapped_value, key, path_parts, path_parts_index 

301 ) 

302 path_parts_break = True # break the outer loop, we are done here. 

303 break 

304 else: 

305 raise ValueError('Bad square brackets syntax on {!r}'.format(array_part)) 

306 if path_parts_break: 

307 break 

308 return found_value, mapped_value 

309 

310 

311def resolve_mapping_to_dict(mapping, source): 

312 """ 

313 move values from `source` into a returned dict, using `mapping` for paths and returned keys. 

314 see resolve_path_to_value for path string formats. 

315 

316 >>> mapping = { 

317 ... 'a': 'x[type=other_type].aa', 

318 ... 'b': 'x[type=some_type].bb', 

319 ... 'c': 'x[type=other_type].cc', 

320 ... } 

321 >>> source = { 

322 ... 'x': [ 

323 ... { 

324 ... 'type': 'some_type', 

325 ... 'aa': '4', 

326 ... 'bb': '5', 

327 ... 'cc': '6' 

328 ... }, 

329 ... { 

330 ... 'type': 'other_type', 

331 ... 'aa': '1', 

332 ... 'bb': '2', 

333 ... 'cc': '3' 

334 ... } 

335 ... ] 

336 ... } 

337 >>> resolve_mapping_to_dict(mapping, source) == {'a': '1', 'b': '5', 'c': '3'} 

338 True 

339 

340 :param mapping: values are paths to find the corresponding value in `source`, keys are were to store said values 

341 :type mapping: dict 

342 :param source: potentially holds the desired values 

343 :type source: dict 

344 :returns: destination dict, containing any found values 

345 :rtype: dict 

346 """ 

347 destination_dict = {} 

348 for destination_key, path in mapping.items(): 

349 found_value, mapped_value = resolve_path_to_value(source, path) 

350 if found_value: 

351 destination_dict[destination_key] = mapped_value 

352 return destination_dict