-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathparse_minimum_fields.py
More file actions
398 lines (344 loc) · 14.4 KB
/
Copy pathparse_minimum_fields.py
File metadata and controls
398 lines (344 loc) · 14.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
#!/usr/bin/env python3
"""
Script to parse xlsx file and extract entity dictionaries.
Reads column A until 'END_OF_FILE', creates dictionaries for each entity
with column A as keys and column D as values.
"""
import pandas as pd
import sys
from pathlib import Path
from obp_dynamic_api import create_dynamic_entity_from_parsed
import argparse
import os
import datetime
def _has_green_checkmark(cell_value):
"""
Check if a cell contains a green checkmark.
Args:
cell_value (str): The cell value to check
Returns:
bool: True if cell contains a green checkmark
"""
if not cell_value:
return False
# Common representations of green checkmarks
checkmarks = ['✓', '✔', '✅', '☑', '√', 'YES', 'Y', 'TRUE', '1']
cell_upper = cell_value.upper().strip()
return any(mark in cell_upper for mark in checkmarks)
def parse_xlsx_entities(file_path):
"""
Parse xlsx file to extract entity dictionaries.
Args:
file_path (str): Path to the xlsx file
Returns:
dict: Dictionary containing all entity dictionaries
"""
try:
# Read the xlsx file
df = pd.read_excel(file_path, engine='openpyxl')
# Initialize variables
entities = {}
current_entity = None
current_dict = {}
# Iterate through rows
for index, row in df.iterrows():
# Get values from columns A, B, C, and D (0-indexed: A=0, B=1, C=2, D=3)
col_a_value = row.iloc[0] if pd.notna(row.iloc[0]) else ""
col_b_value = row.iloc[1] if len(row) > 1 and pd.notna(row.iloc[1]) else ""
col_c_value = row.iloc[2] if len(row) > 2 and pd.notna(row.iloc[2]) else ""
col_d_value = row.iloc[3] if len(row) > 3 and pd.notna(row.iloc[3]) else ""
# Column F (index 5) holds descriptions; Column G (index 6) holds examples
col_f_value = row.iloc[5] if len(row) > 5 and pd.notna(row.iloc[5]) else ""
col_g_value = row.iloc[6] if len(row) > 6 and pd.notna(row.iloc[6]) else ""
# Convert to string for processing (handle Excel dates safely)
col_a_str = str(col_a_value).strip()
col_b_str = str(col_b_value).strip()
col_c_str = str(col_c_value).strip()
col_d_str = str(col_d_value).strip()
col_f_str = str(col_f_value).strip()
# If pandas read a datetime/timestamp, format as YYYY-MM-DD to match DATE_WITH_DAY
if col_g_value == "" or pd.isna(col_g_value):
col_g_str = ""
else:
if isinstance(col_g_value, (pd.Timestamp, datetime.datetime, datetime.date)):
try:
col_g_str = col_g_value.strftime("%Y-%m-%d")
except Exception:
col_g_str = str(col_g_value).strip()
else:
col_g_str = str(col_g_value).strip()
# Clean example string: remove surrounding double or single quotes if present
cleaned_example = col_g_str
if len(cleaned_example) >= 2:
if (cleaned_example.startswith('"') and cleaned_example.endswith('"')) or (
cleaned_example.startswith("'") and cleaned_example.endswith("'")
):
cleaned_example = cleaned_example[1:-1].strip()
# Check for stop marker
if col_a_str == "END_OF_FILE":
# Save current entity if exists
if current_entity and current_dict:
entities[current_entity] = current_dict
break
# Check if this row starts a new entity
if col_a_str.lower().startswith("entity:"):
# Save previous entity if exists
if current_entity and current_dict:
entities[current_entity] = {"description": current_entity_description, "fields": current_dict}
# Start new entity
entity_name = col_a_str[7:].strip() # Remove "entity:" prefix
current_entity = entity_name
current_dict = {}
# capture entity-level description from column F when present
current_entity_description = col_f_str if col_f_str else f"Parsed entity {entity_name}"
elif current_entity:
# Add to current entity dictionary if we have a valid key
if col_a_str and col_a_str != "nan":
# sanitize field name: replace dots with underscore to satisfy allowed chars
# sanitize field name: only allow A-Z a-z 0-9 underscore and hyphen
# replace any other character with underscore, collapse repeated underscores
import re
safe_key_raw = col_a_str.replace('.', '_')
safe_key = re.sub(r'[^A-Za-z0-9_\-]', '_', safe_key_raw)
# collapse multiple underscores
safe_key = re.sub(r'_+', '_', safe_key).strip('_')
# Check for green check marks in columns B and C
has_green_check_b = _has_green_checkmark(col_b_str)
has_green_check_c = _has_green_checkmark(col_c_str)
if has_green_check_b:
# Column B has green check - add normally; preserve column D as value
# and attach column G as explicit example when available
entry = {"value": col_d_str}
if cleaned_example:
entry["example"] = cleaned_example
if col_f_str:
entry["description"] = col_f_str
current_dict[safe_key] = entry
elif has_green_check_c:
# Column C has green check but not B - mark as optional
opt_key = f"{safe_key} (optional)"
entry = {"value": col_d_str}
if cleaned_example:
entry["example"] = cleaned_example
if col_f_str:
entry["description"] = col_f_str
current_dict[opt_key] = entry
# If neither B nor C has green check, skip this row
# Save the last entity if exists
if current_entity and current_dict:
entities[current_entity] = {"description": current_entity_description, "fields": current_dict}
return entities
except FileNotFoundError:
print(f"Error: File '{file_path}' not found.")
return {}
except Exception as e:
print(f"Error reading file: {e}")
return {}
def _has_reference_field(fields):
"""True if any field declares a reference:<name> type."""
if not isinstance(fields, dict):
return False
for v in fields.values():
if isinstance(v, dict) and isinstance(v.get("value"), str) and v["value"].strip().startswith("reference:"):
return True
return False
def _create_entities_two_pass(entities, token=None, host=None, has_personal=False, has_community=False):
"""Create dynamic entities in two passes so references between them always
resolve, even when the spreadsheet order has forward references or cycles.
Pass 1: create every entity with reference fields downgraded to plain
strings, so no create depends on another entity existing yet.
Pass 2: PUT-update each entity that declares references, restoring the real
reference:<name> types now that every target entity exists on OBP.
References whose target still does not exist stay as strings.
"""
from obp_dynamic_api import (
create_dynamic_entity_from_parsed,
build_entity_definition_from_parsed,
update_system_dynamic_entity,
get_dynamic_entity_id_by_name,
get_existing_entity_names,
BUILTIN_REFERENCE_TYPES,
)
created_ids = {}
# ---- Pass 1: create everything with references as strings ----
print("Pass 1/2: creating entities (references temporarily as strings) ...")
for entity_name, wrapper in entities.items():
fields = wrapper.get("fields") if isinstance(wrapper, dict) else wrapper
description = wrapper.get("description") if isinstance(wrapper, dict) else None
print(f"Processing entity: {entity_name} ...")
try:
resp = create_dynamic_entity_from_parsed(
entity_name,
fields,
token=token,
base_url=host,
has_personal=has_personal,
has_community=has_community,
entity_description=description,
downgrade_references=True,
)
dyn_id = resp.get("dynamicEntityId", "<no-id>")
created_ids[entity_name] = dyn_id
print(f" {'Exists' if resp.get('existing') else 'Created'}: {dyn_id}")
except Exception as e:
print(f" Failed to create entity {entity_name}: {e}")
# ---- Pass 2: restore reference types now that all targets exist ----
entities_with_refs = [
n for n, w in entities.items()
if _has_reference_field(w.get("fields") if isinstance(w, dict) else w)
]
if not entities_with_refs:
return
print("Pass 2/2: restoring reference types ...")
existing_names = get_existing_entity_names(token=token, base_url=host)
allowed_refs = {f"reference:{n}" for n in existing_names} | BUILTIN_REFERENCE_TYPES
for entity_name in entities_with_refs:
wrapper = entities[entity_name]
fields = wrapper.get("fields") if isinstance(wrapper, dict) else wrapper
description = wrapper.get("description") if isinstance(wrapper, dict) else None
dyn_id = created_ids.get(entity_name) or get_dynamic_entity_id_by_name(entity_name, token=token, base_url=host)
if not dyn_id or dyn_id == "<no-id>":
print(f" Skipping references for {entity_name}: entity was not created")
continue
try:
entity_def = build_entity_definition_from_parsed(
entity_name,
fields,
has_personal=has_personal,
has_community=has_community,
entity_description=description,
allowed_reference_types=allowed_refs,
)
update_system_dynamic_entity(dyn_id, entity_def, token=token, base_url=host)
print(f" Restored references: {entity_name}")
except Exception as e:
print(f" Failed to restore references for {entity_name}: {e}")
def main():
"""Main function to run the parser."""
parser = argparse.ArgumentParser(description="Parse minimal field matrix and optionally create dynamic entities on OBP")
parser.add_argument("file", nargs="?", default="min_field_matrix.xlsx", help="Path to the xlsx file")
parser.add_argument("--create", action="store_true", help="Create parsed entities on OBP (will call management API)")
parser.add_argument("--update", action="store_true", help="Update existing parsed entities on OBP (will call management API)")
parser.add_argument("--token", default=None, help="DirectLogin token to use (overrides obp_client.token)")
parser.add_argument("--host", default=None, help="OBP host to use (overrides obp_client.obp_host)")
parser.add_argument("--yes", action="store_true", help="If set with --create, skip confirmation prompt")
parser.add_argument("--save", action="store_true", help="Save parsed entities to --output non-interactively (no prompt)")
parser.add_argument("--output", default="entities_output.txt", help="Output file used by --save (default: entities_output.txt)")
args = parser.parse_args()
file_path = args.file
# Read access flags from environment (.env or system env)
def _env_to_bool(val):
if val is None:
return False
if isinstance(val, bool):
return val
s = str(val).strip().lower()
return s in ("1", "true", "yes", "y", "on")
has_personal = _env_to_bool(os.getenv("HAS_PERSONAL_ENTITY", "false"))
has_community = _env_to_bool(os.getenv("HAS_COMMUNITY_ACCESS", "false"))
# Check if file exists
if not Path(file_path).exists():
print(f"File '{file_path}' does not exist.")
return
print(f"Parsing file: {file_path}")
entities = parse_xlsx_entities(file_path)
if not entities:
print("No entities found or error occurred.")
return
# Display results
print(f"\nFound {len(entities)} entities:")
print("=" * 50)
for entity_name, entity_dict in entities.items():
print(f"\nEntity: {entity_name}")
print("-" * 30)
if entity_dict:
for key, value in entity_dict.items():
print(f" {key}: {value}")
else:
print(" (No data)")
# Optionally create or update entities on OBP management API
if args.create and args.update:
print("Cannot use --create and --update together. Choose one.")
return
if args.create or args.update:
if args.create:
print("--create flag provided: will attempt to create parsed entities on OBP")
if args.update:
print("--update flag provided: will attempt to update existing parsed entities on OBP")
if not args.yes:
confirm = input("Proceed to create entities on OBP? Type 'yes' to continue: ")
if confirm.strip().lower() != "yes":
print("Aborted by user.")
return
if args.create:
# Two-pass create so references between entities always resolve,
# regardless of spreadsheet ordering or reference cycles.
_create_entities_two_pass(
entities,
token=args.token,
host=args.host,
has_personal=has_personal,
has_community=has_community,
)
return
# --update: refresh existing entities in place. Validate references
# against the entities that currently exist on OBP.
from obp_dynamic_api import (
get_dynamic_entity_id_by_name,
build_entity_definition_from_parsed,
update_system_dynamic_entity,
get_existing_entity_names,
BUILTIN_REFERENCE_TYPES,
)
existing_names = get_existing_entity_names(token=args.token, base_url=args.host)
allowed_refs = {f"reference:{n}" for n in existing_names} | BUILTIN_REFERENCE_TYPES
for entity_name, entity_wrapper in entities.items():
print(f"Processing entity: {entity_name} ...")
try:
entity_description = entity_wrapper.get("description") if isinstance(entity_wrapper, dict) else None
fields = entity_wrapper.get("fields") if isinstance(entity_wrapper, dict) else entity_wrapper
dynamic_id = get_dynamic_entity_id_by_name(entity_name, token=args.token, base_url=args.host)
if not dynamic_id:
print(f"No existing dynamic entity found for '{entity_name}', skipping update.")
continue
entity_def = build_entity_definition_from_parsed(
entity_name, fields, entity_description=entity_description,
allowed_reference_types=allowed_refs,
)
resp = update_system_dynamic_entity(dynamic_id, entity_def, token=args.token, base_url=args.host)
print(f"Updated: {resp.get('dynamicEntityId', dynamic_id)}")
except Exception as e:
print(f"Failed to process entity {entity_name}: {e}")
return
# Non-interactive save (used by scripts, e.g. recreate_ogcr_entities.sh)
if args.save:
_write_entities_file(entities, file_path, args.output)
return
# Otherwise, offer to save interactively
save_option = input("\nSave results to a file? (y/n): ").lower().strip()
if save_option in ['y', 'yes']:
output_file = input("Enter output filename (default: entities_output.txt): ").strip()
if not output_file:
output_file = "entities_output.txt"
_write_entities_file(entities, file_path, output_file)
def _write_entities_file(entities, file_path, output_file):
"""Write parsed entities to `output_file` in the `Entity: <name>` format
consumed by delete_ogcr_entities.py."""
try:
with open(output_file, 'w', encoding='utf-8') as f:
f.write(f"Parsed entities from: {file_path}\n")
f.write("=" * 50 + "\n\n")
for entity_name, entity_dict in entities.items():
f.write(f"Entity: {entity_name}\n")
f.write("-" * 30 + "\n")
if entity_dict:
for key, value in entity_dict.items():
f.write(f" {key}: {value}\n")
else:
f.write(" (No data)\n")
f.write("\n")
print(f"Results saved to: {output_file}")
except Exception as e:
print(f"Error saving file: {e}")
if __name__ == "__main__":
main()