Files
api/app/importdata.py
2025-12-30 11:27:14 +07:00

409 lines
16 KiB
Python
Executable File

import os, pandas, json
from rest_framework import status
from rest_framework.decorators import api_view
from rest_framework.response import Response
from app.views import get_serializer
from app.basic import *
from datetime import datetime
#=============================================================================
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
static_folder = os.path.join(BASE_DIR, "static")
upload_folder = static_folder + "/files/"
#=============================================================================
def check_fields(columns, fields):
lack = []
for name in fields:
found = find(columns, {'name': name})
lack.append(name) if found == None else True
return lack
def field_related(name, columns):
Model, serializer_class = get_serializer(name)
fields = []
for field in Model._meta.get_fields():
obj = {"name": field.name, "related": field.related_model.__name__ if field.related_model else None }
fields.append(obj)
for key, value in columns.items():
if getattr(value, 'api'):
found = find(fields, {"name": key})
value['model'] = found['related']
return columns
def check_data(data, fields):
for key, value in fields.items():
for row in data:
row[key] = None if empty(row[key]) == True else row[key]
if (getattr(value, 'empty') == 'no' or getattr(value, 'api') != None) and empty(row[key]) == True:
row['error'] = key + ' không được bỏ trống'
elif getattr(value, 'type') == 'number':
if empty(row[key]) == False and isnumber(row[key]) == False:
row['error'] = key + ' không phải là số'
elif getattr(value, 'api') != None:
row[key] = int(row[key])
elif getattr(value, 'type') == 'date':
if empty(row[key]) == False and isdate(row[key]) == False:
row['error'] = key + ' không đúng định dạng YYYY-MM-DD'
else:
row[key] = formatdate(row[key])
if getattr(row, 'error'):
return 'error', [row]
return None, data
def validate_key(name, data, columns):
keys = []
related = []
for key, value in columns.items():
if getattr(value, 'key') == 'yes':
keys.append({"key": key, "value": value})
if getattr(value, 'api') != None:
related.append({"key": key, "value": value})
Model, serializer_class = get_serializer(name)
fields = []
for field in Model._meta.get_fields():
obj = {"name": field.name, "related": field.related_model.__name__ if field.related_model else None }
fields.append(obj)
error = False
if len(related)>0:
for row in data:
for obj in related:
key = obj['key']
value = obj['value']
name = value['column']
f = {name: row[key]}
field = find(fields, {'name': value['field']})
Model1, serializer_class = get_serializer(field['related'])
obj = Model1.objects.filter(**f).values('id').first()
if obj:
row['_' + key] = str(row[key])
row[key] = obj['id']
else:
row['error'] = 'Không tồn tại ' + key + ' = ' + str(row[key])
error = True
if error == False and len(keys)>0:
for row in data:
f = {}
for obj in keys:
key = obj['key']
value = obj['value']
if 'api' in value:
name = value['field']
f[name] = row[key]
else:
f[key] = row[key]
obj = Model.objects.filter(**f).values('id').first()
row['id'] = obj['id'] if obj !=None else None
return error, data, keys
def validate_duplicate(data, keys):
arr = [o for o in data if o['id'] == None]
ele = {}
for row in arr:
attr = ''
for o in keys:
key = '_' + o['key'] if getattr(o['value'], 'api') != None else o['key']
attr += str(row[key]) + '#'
attr = attr[0: len(attr)-1]
counter = 0 if getattr(ele, attr) == None else getattr(ele, attr)
ele[attr] = counter + 1
array = []
for key, value in ele.items():
if value>1:
arr1 = key.split('#')
f = {}
count = 0
for o in keys:
key = '_' + o['key'] if getattr(o['value'], 'api') != None else o['key']
f[key] = int(arr1[count]) if getattr(o['value'], 'type')=='number' else arr1[count]
count += 1
arr2 = filter(arr, f)
if len(arr2) >0:
arr2[0]['error'] = str(arr1) + ' trùng ' + str(len(arr2)) + ' dòng'
array += arr2
return array
def bulk_insert(data, fields, logcode):
Model, serializer_class = get_serializer(logcode.model)
count = 0
batch = []
success_count = 0
error_count = 0
arr = [o for o in data if o['id'] == None]
total_records = len(arr)
error = None
try:
for row in arr:
count += 1
batch.append(row)
if count % 500 == 0 or count == total_records:
print(count)
serializer = serializer_class(data = batch, many=True)
if serializer.is_valid():
serializer.save()
success_count += len(batch)
else:
print(serializer.errors)
error = serializer.errors
error_count += len(batch)
batch = []
logcode.success_count = success_count
logcode.error_count = error_count
logcode.total = count
logcode.save()
except Exception as e:
print('error', e)
error = e
return error, error_count, success_count, count
def bulk_update(data, fields, logcode):
Model, serializer_class = get_serializer(logcode.model)
count = 0
batch = []
array = []
success_count = 0
error_count = 0
error = None
succ_count = 0 if logcode.success_count == None else logcode.success_count
err_count = 0 if logcode.error_count == None else logcode.error_count
total = 0 if logcode.total == None else logcode.total
arr = [o for o in data if o['id'] != None]
total_records = len(arr)
try:
for obj in arr:
count += 1
batch.append(obj)
if count % 500 == 0 or count == total_records:
qs = Model.objects.filter(id__in= [o['id'] for o in batch])
for row in qs:
found = find(batch, {'id': row.id})
for key, value in fields.items():
if getattr(value, 'api') != None:
Model1, serializer_class1 = get_serializer(value['model'])
setattr(row, key, Model1.objects.get(id=found[key]))
else:
setattr(row, key, found[key])
array.append(row)
print(count)
try:
Model.objects.bulk_update(array, list(fields.keys()))
success_count += len(batch)
except Exception as e:
error_count += len(batch)
error = e
batch = []
array = []
logcode.success_count = succ_count + success_count
logcode.error_count = err_count + error_count
logcode.total = total + count
logcode.save()
except Exception as e:
print('error', e)
error = e
return error, error_count, success_count, count
def open_file(model, filename, fields, isfull=False):
try:
df = pandas.read_excel("Upload/" + filename, dtype=str, na_filter=False)
data = df.to_json(path_or_buf= None, orient='table', force_ascii=False)
data = json.loads(data)
lack = check_fields(data['schema']['fields'], fields)
if len(lack) > 0:
return {'error': 'lack-fields', 'fields': lack}
fields = field_related(model, fields)
result, rows = check_data(data['data'], fields)
if result != None:
return {'error': 'data-error', 'data': rows, 'fields': data['schema']['fields']}
else:
return {'data': rows if isfull else rows[:1000], 'fields': data['schema']['fields'], 'total': len(rows)}
except Exception as e:
print(e)
def perform_import(code):
Model, serializer_class = get_serializer('Import_Log')
logcode = Model.objects.filter(code = code).first()
progress = [{'code': 1, 'message': 'Bắt đầu kiểm tra bản ghi mới / tồn tại' + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'waiting'}]
logcode.status = 'executing'
logcode.progress = progress
logcode.save()
rs = open_file(logcode.model, logcode.file, logcode.fields, True)
error, rows, keys = validate_key(logcode.model, rs['data'], logcode.fields)
if error == True:
arr = [getattr(o, 'error') for o in rows if getattr(o, 'error') != None][:10]
logcode.note = ', '.join(arr)
logcode.status = 'error'
progress.append({'code': 2, 'message': 'Dữ liệu có lỗi: ' + logcode.note, 'type': 'error'})
logcode.save()
return
# inform
progress.append({'code': 3, 'message': 'Hoàn tất kiểm tra dữ liệu mới / tồn tại' + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'success'})
progress.append({'code': 3.1, 'message': 'Bắt đầu kiểm tra dữ liệu trùng lặp' + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'waiting'})
logcode.save()
# check duplicate
arr = validate_duplicate(rows, keys)
if len(arr) >0:
arr = [getattr(o, 'error') for o in arr if getattr(o, 'error') != None][:10]
logcode.note = ', '.join(arr)
logcode.status = 'error'
progress.append({'code': 3.2, 'message': 'Dữ liệu bị trùng lặp, cần loại bỏ trùng lặp' + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'error'})
logcode.save()
return
progress.append({'code': 3, 'message': 'Hoàn tất dữ liệu trùng lặp' + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'success'})
progress.append({'code': 4, 'message': 'Bắt đầu insert dữ liệu' + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'waiting'})
logcode.save()
# insert
error, error_count, success_count, count = bulk_insert(rows, logcode.fields, logcode)
if error != None:
logcode.note = error
logcode.status = 'error'
progress.append({'code': 5, 'message': 'Insert dữ liệu có lỗi: ' + str(error_count) + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'error'})
logcode.save()
return
progress.append({'code': 6, 'message': 'Insert dữ liệu thành công: ' + str(success_count) + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'success'})
progress.append({'code': 7, 'message': 'Bắt đầu cập nhật dữ liệu' + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'waiting'})
logcode.save()
error, error_count, success_count, count = bulk_update(rows, logcode.fields, logcode)
if error != None:
logcode.note = error
logcode.status = 'error'
progress.append({'code': 8, 'message': 'Cập nhật dữ liệu có lỗi: ' + str(error_count) + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'error'})
else:
progress.append({'code': 9, 'message': 'Cập nhật dữ liệu thành công: ' + str(success_count) + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'success'})
logcode.status = 'success'
# save
logcode.save()
# update after import success
return serializer_class(logcode).data
@api_view(['POST'])
def bulk_import(request):
perform_import(request.data['logcode'])
return Response(status.HTTP_200_OK)
@api_view(['POST'])
def bulk_upload(request):
if request.method == 'POST':
file = request.data['file']
filename = request.data['name']
fields = request.data['fields']
fields = json.loads(fields)
model = request.data['model']
try:
with open(upload_folder + filename, 'wb+') as destination:
for chunk in file.chunks():
destination.write(chunk)
result = open_file(model, filename, fields)
return Response(result)
except IOError as e: # Will only catch IOErrors
return Response(e) # Re-raise other IOErrors
except OSError as e: # Will only catch OSErrors
return Response(e)
#=============================================================================
@api_view(['GET'])
def read_excel(request):
try:
filename = request.query_params['name']
df = pandas.read_excel(upload_folder + filename, dtype=str, na_filter=False)
val = df.to_json(path_or_buf= None, orient='table', force_ascii=False)
return Response(val)
except IOError as e: # Will only catch IOErrors
return Response(e) # Re-raise other IOErrors
except OSError as e: # Will only catch OSErrors
return Response(e)
#=============================================================================
@api_view(['GET'])
def model_fields(request, name):
Model, serializer_class = get_serializer(name)
if Model == None:
return Response(status=status.HTTP_400_BAD_REQUEST)
arr = []
count = 0
for field in Model._meta.get_fields():
count += 1
obj = {"id": count, "name": field.name, "datatype": field.get_internal_type(), "null": field.null,
"unique": field.unique if hasattr(field, 'unique') else False,
"related": field.related_model.__name__ if field.related_model else None }
arr.append(obj)
return Response(arr)
#=============================================================================
@api_view(['POST'])
def find_key(request):
Model, serializer_class = get_serializer(request.data['name'])
fields = []
for field in Model._meta.get_fields():
obj = {"name": field.name, "related": field.related_model.__name__ if field.related_model else None }
fields.append(obj)
keys = request.data['keys']
related = request.data['related']
data = request.data['data']
def find_field(name):
for field in fields:
if field['name'] == name:
print('name', name, field['related'])
Model, serializerclass = get_serializer(field['related'])
return field, Model
error = False
if len(related)>0:
for row in data:
for obj in related:
key = obj['key']
value = obj['value']
name = value['column']
f = {name: row[key]}
field, Model1 = find_field(value['field'])
obj = Model1.objects.filter(**f).values('id').first()
if obj:
row[value['field']] = obj['id']
else:
row['error'] = 'Không tồn tại ' + key + ' = ' + str(row[key])
error = True
if error == False and len(keys) > 0:
for row in data:
f = {}
for obj in keys:
key = obj['key']
value = obj['value']
db_field = value.get('field') or value.get('column') or key
f[db_field] = row[key]
obj = Model.objects.filter(**f).values('id').first()
row['id'] = obj['id'] if obj != None else None
return Response({"error": error, "data": data})