import os, pandas, json from rest_framework import status from rest_framework.decorators import api_view from rest_framework.response import Response from app.views import get_serializer from app.basic import * from datetime import datetime #============================================================================= BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) static_folder = os.path.join(BASE_DIR, "static") upload_folder = static_folder + "/files/" #============================================================================= def check_fields(columns, fields): lack = [] for name in fields: found = find(columns, {'name': name}) lack.append(name) if found == None else True return lack def field_related(name, columns): Model, serializer_class = get_serializer(name) fields = [] for field in Model._meta.get_fields(): obj = {"name": field.name, "related": field.related_model.__name__ if field.related_model else None } fields.append(obj) for key, value in columns.items(): if getattr(value, 'api'): found = find(fields, {"name": key}) value['model'] = found['related'] return columns def check_data(data, fields): for key, value in fields.items(): for row in data: row[key] = None if empty(row[key]) == True else row[key] if (getattr(value, 'empty') == 'no' or getattr(value, 'api') != None) and empty(row[key]) == True: row['error'] = key + ' không được bỏ trống' elif getattr(value, 'type') == 'number': if empty(row[key]) == False and isnumber(row[key]) == False: row['error'] = key + ' không phải là số' elif getattr(value, 'api') != None: row[key] = int(row[key]) elif getattr(value, 'type') == 'date': if empty(row[key]) == False and isdate(row[key]) == False: row['error'] = key + ' không đúng định dạng YYYY-MM-DD' else: row[key] = formatdate(row[key]) if getattr(row, 'error'): return 'error', [row] return None, data def validate_key(name, data, columns): keys = [] related = [] for key, value in columns.items(): if getattr(value, 'key') == 'yes': keys.append({"key": key, "value": value}) if getattr(value, 'api') != None: related.append({"key": key, "value": value}) Model, serializer_class = get_serializer(name) fields = [] for field in Model._meta.get_fields(): obj = {"name": field.name, "related": field.related_model.__name__ if field.related_model else None } fields.append(obj) error = False if len(related)>0: for row in data: for obj in related: key = obj['key'] value = obj['value'] name = value['column'] f = {name: row[key]} field = find(fields, {'name': value['field']}) Model1, serializer_class = get_serializer(field['related']) obj = Model1.objects.filter(**f).values('id').first() if obj: row['_' + key] = str(row[key]) row[key] = obj['id'] else: row['error'] = 'Không tồn tại ' + key + ' = ' + str(row[key]) error = True if error == False and len(keys)>0: for row in data: f = {} for obj in keys: key = obj['key'] value = obj['value'] if 'api' in value: name = value['field'] f[name] = row[key] else: f[key] = row[key] obj = Model.objects.filter(**f).values('id').first() row['id'] = obj['id'] if obj !=None else None return error, data, keys def validate_duplicate(data, keys): arr = [o for o in data if o['id'] == None] ele = {} for row in arr: attr = '' for o in keys: key = '_' + o['key'] if getattr(o['value'], 'api') != None else o['key'] attr += str(row[key]) + '#' attr = attr[0: len(attr)-1] counter = 0 if getattr(ele, attr) == None else getattr(ele, attr) ele[attr] = counter + 1 array = [] for key, value in ele.items(): if value>1: arr1 = key.split('#') f = {} count = 0 for o in keys: key = '_' + o['key'] if getattr(o['value'], 'api') != None else o['key'] f[key] = int(arr1[count]) if getattr(o['value'], 'type')=='number' else arr1[count] count += 1 arr2 = filter(arr, f) if len(arr2) >0: arr2[0]['error'] = str(arr1) + ' trùng ' + str(len(arr2)) + ' dòng' array += arr2 return array def bulk_insert(data, fields, logcode): Model, serializer_class = get_serializer(logcode.model) count = 0 batch = [] success_count = 0 error_count = 0 arr = [o for o in data if o['id'] == None] total_records = len(arr) error = None try: for row in arr: count += 1 batch.append(row) if count % 500 == 0 or count == total_records: print(count) serializer = serializer_class(data = batch, many=True) if serializer.is_valid(): serializer.save() success_count += len(batch) else: print(serializer.errors) error = serializer.errors error_count += len(batch) batch = [] logcode.success_count = success_count logcode.error_count = error_count logcode.total = count logcode.save() except Exception as e: print('error', e) error = e return error, error_count, success_count, count def bulk_update(data, fields, logcode): Model, serializer_class = get_serializer(logcode.model) count = 0 batch = [] array = [] success_count = 0 error_count = 0 error = None succ_count = 0 if logcode.success_count == None else logcode.success_count err_count = 0 if logcode.error_count == None else logcode.error_count total = 0 if logcode.total == None else logcode.total arr = [o for o in data if o['id'] != None] total_records = len(arr) try: for obj in arr: count += 1 batch.append(obj) if count % 500 == 0 or count == total_records: qs = Model.objects.filter(id__in= [o['id'] for o in batch]) for row in qs: found = find(batch, {'id': row.id}) for key, value in fields.items(): if getattr(value, 'api') != None: Model1, serializer_class1 = get_serializer(value['model']) setattr(row, key, Model1.objects.get(id=found[key])) else: setattr(row, key, found[key]) array.append(row) print(count) try: Model.objects.bulk_update(array, list(fields.keys())) success_count += len(batch) except Exception as e: error_count += len(batch) error = e batch = [] array = [] logcode.success_count = succ_count + success_count logcode.error_count = err_count + error_count logcode.total = total + count logcode.save() except Exception as e: print('error', e) error = e return error, error_count, success_count, count def open_file(model, filename, fields, isfull=False): try: df = pandas.read_excel("Upload/" + filename, dtype=str, na_filter=False) data = df.to_json(path_or_buf= None, orient='table', force_ascii=False) data = json.loads(data) lack = check_fields(data['schema']['fields'], fields) if len(lack) > 0: return {'error': 'lack-fields', 'fields': lack} fields = field_related(model, fields) result, rows = check_data(data['data'], fields) if result != None: return {'error': 'data-error', 'data': rows, 'fields': data['schema']['fields']} else: return {'data': rows if isfull else rows[:1000], 'fields': data['schema']['fields'], 'total': len(rows)} except Exception as e: print(e) def perform_import(code): Model, serializer_class = get_serializer('Import_Log') logcode = Model.objects.filter(code = code).first() progress = [{'code': 1, 'message': 'Bắt đầu kiểm tra bản ghi mới / tồn tại' + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'waiting'}] logcode.status = 'executing' logcode.progress = progress logcode.save() rs = open_file(logcode.model, logcode.file, logcode.fields, True) error, rows, keys = validate_key(logcode.model, rs['data'], logcode.fields) if error == True: arr = [getattr(o, 'error') for o in rows if getattr(o, 'error') != None][:10] logcode.note = ', '.join(arr) logcode.status = 'error' progress.append({'code': 2, 'message': 'Dữ liệu có lỗi: ' + logcode.note, 'type': 'error'}) logcode.save() return # inform progress.append({'code': 3, 'message': 'Hoàn tất kiểm tra dữ liệu mới / tồn tại' + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'success'}) progress.append({'code': 3.1, 'message': 'Bắt đầu kiểm tra dữ liệu trùng lặp' + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'waiting'}) logcode.save() # check duplicate arr = validate_duplicate(rows, keys) if len(arr) >0: arr = [getattr(o, 'error') for o in arr if getattr(o, 'error') != None][:10] logcode.note = ', '.join(arr) logcode.status = 'error' progress.append({'code': 3.2, 'message': 'Dữ liệu bị trùng lặp, cần loại bỏ trùng lặp' + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'error'}) logcode.save() return progress.append({'code': 3, 'message': 'Hoàn tất dữ liệu trùng lặp' + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'success'}) progress.append({'code': 4, 'message': 'Bắt đầu insert dữ liệu' + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'waiting'}) logcode.save() # insert error, error_count, success_count, count = bulk_insert(rows, logcode.fields, logcode) if error != None: logcode.note = error logcode.status = 'error' progress.append({'code': 5, 'message': 'Insert dữ liệu có lỗi: ' + str(error_count) + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'error'}) logcode.save() return progress.append({'code': 6, 'message': 'Insert dữ liệu thành công: ' + str(success_count) + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'success'}) progress.append({'code': 7, 'message': 'Bắt đầu cập nhật dữ liệu' + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'waiting'}) logcode.save() error, error_count, success_count, count = bulk_update(rows, logcode.fields, logcode) if error != None: logcode.note = error logcode.status = 'error' progress.append({'code': 8, 'message': 'Cập nhật dữ liệu có lỗi: ' + str(error_count) + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'error'}) else: progress.append({'code': 9, 'message': 'Cập nhật dữ liệu thành công: ' + str(success_count) + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'success'}) logcode.status = 'success' # save logcode.save() # update after import success return serializer_class(logcode).data @api_view(['POST']) def bulk_import(request): perform_import(request.data['logcode']) return Response(status.HTTP_200_OK) @api_view(['POST']) def bulk_upload(request): if request.method == 'POST': file = request.data['file'] filename = request.data['name'] fields = request.data['fields'] fields = json.loads(fields) model = request.data['model'] try: with open(upload_folder + filename, 'wb+') as destination: for chunk in file.chunks(): destination.write(chunk) result = open_file(model, filename, fields) return Response(result) except IOError as e: # Will only catch IOErrors return Response(e) # Re-raise other IOErrors except OSError as e: # Will only catch OSErrors return Response(e) #============================================================================= @api_view(['GET']) def read_excel(request): try: filename = request.query_params['name'] df = pandas.read_excel(upload_folder + filename, dtype=str, na_filter=False) val = df.to_json(path_or_buf= None, orient='table', force_ascii=False) return Response(val) except IOError as e: # Will only catch IOErrors return Response(e) # Re-raise other IOErrors except OSError as e: # Will only catch OSErrors return Response(e) #============================================================================= @api_view(['GET']) def model_fields(request, name): Model, serializer_class = get_serializer(name) if Model == None: return Response(status=status.HTTP_400_BAD_REQUEST) arr = [] count = 0 for field in Model._meta.get_fields(): count += 1 obj = {"id": count, "name": field.name, "datatype": field.get_internal_type(), "null": field.null, "unique": field.unique if hasattr(field, 'unique') else False, "related": field.related_model.__name__ if field.related_model else None } arr.append(obj) return Response(arr) #============================================================================= @api_view(['POST']) def find_key(request): Model, serializer_class = get_serializer(request.data['name']) fields = [] for field in Model._meta.get_fields(): obj = {"name": field.name, "related": field.related_model.__name__ if field.related_model else None } fields.append(obj) keys = request.data['keys'] related = request.data['related'] data = request.data['data'] def find_field(name): for field in fields: if field['name'] == name: print('name', name, field['related']) Model, serializerclass = get_serializer(field['related']) return field, Model error = False if len(related)>0: for row in data: for obj in related: key = obj['key'] value = obj['value'] name = value['column'] f = {name: row[key]} field, Model1 = find_field(value['field']) obj = Model1.objects.filter(**f).values('id').first() if obj: row[value['field']] = obj['id'] else: row['error'] = 'Không tồn tại ' + key + ' = ' + str(row[key]) error = True if error == False and len(keys) > 0: for row in data: f = {} for obj in keys: key = obj['key'] value = obj['value'] db_field = value.get('field') or value.get('column') or key f[db_field] = row[key] obj = Model.objects.filter(**f).values('id').first() row['id'] = obj['id'] if obj != None else None return Response({"error": error, "data": data})