Initial commit (Clean history)
This commit is contained in:
408
app/importdata.py
Executable file
408
app/importdata.py
Executable file
@@ -0,0 +1,408 @@
|
||||
import os, pandas, json
|
||||
from rest_framework import status
|
||||
from rest_framework.decorators import api_view
|
||||
from rest_framework.response import Response
|
||||
from app.views import get_serializer
|
||||
from app.basic import *
|
||||
from datetime import datetime
|
||||
|
||||
#=============================================================================
|
||||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
static_folder = os.path.join(BASE_DIR, "static")
|
||||
upload_folder = static_folder + "/files/"
|
||||
|
||||
#=============================================================================
|
||||
def check_fields(columns, fields):
|
||||
lack = []
|
||||
for name in fields:
|
||||
found = find(columns, {'name': name})
|
||||
lack.append(name) if found == None else True
|
||||
return lack
|
||||
|
||||
|
||||
def field_related(name, columns):
|
||||
Model, serializer_class = get_serializer(name)
|
||||
fields = []
|
||||
for field in Model._meta.get_fields():
|
||||
obj = {"name": field.name, "related": field.related_model.__name__ if field.related_model else None }
|
||||
fields.append(obj)
|
||||
|
||||
for key, value in columns.items():
|
||||
if getattr(value, 'api'):
|
||||
found = find(fields, {"name": key})
|
||||
value['model'] = found['related']
|
||||
return columns
|
||||
|
||||
|
||||
def check_data(data, fields):
|
||||
for key, value in fields.items():
|
||||
for row in data:
|
||||
row[key] = None if empty(row[key]) == True else row[key]
|
||||
if (getattr(value, 'empty') == 'no' or getattr(value, 'api') != None) and empty(row[key]) == True:
|
||||
row['error'] = key + ' không được bỏ trống'
|
||||
|
||||
elif getattr(value, 'type') == 'number':
|
||||
if empty(row[key]) == False and isnumber(row[key]) == False:
|
||||
row['error'] = key + ' không phải là số'
|
||||
elif getattr(value, 'api') != None:
|
||||
row[key] = int(row[key])
|
||||
|
||||
elif getattr(value, 'type') == 'date':
|
||||
if empty(row[key]) == False and isdate(row[key]) == False:
|
||||
row['error'] = key + ' không đúng định dạng YYYY-MM-DD'
|
||||
else:
|
||||
row[key] = formatdate(row[key])
|
||||
|
||||
if getattr(row, 'error'):
|
||||
return 'error', [row]
|
||||
return None, data
|
||||
|
||||
|
||||
def validate_key(name, data, columns):
|
||||
keys = []
|
||||
related = []
|
||||
for key, value in columns.items():
|
||||
if getattr(value, 'key') == 'yes':
|
||||
keys.append({"key": key, "value": value})
|
||||
if getattr(value, 'api') != None:
|
||||
related.append({"key": key, "value": value})
|
||||
|
||||
Model, serializer_class = get_serializer(name)
|
||||
fields = []
|
||||
for field in Model._meta.get_fields():
|
||||
obj = {"name": field.name, "related": field.related_model.__name__ if field.related_model else None }
|
||||
fields.append(obj)
|
||||
|
||||
error = False
|
||||
if len(related)>0:
|
||||
for row in data:
|
||||
for obj in related:
|
||||
key = obj['key']
|
||||
value = obj['value']
|
||||
name = value['column']
|
||||
f = {name: row[key]}
|
||||
field = find(fields, {'name': value['field']})
|
||||
Model1, serializer_class = get_serializer(field['related'])
|
||||
obj = Model1.objects.filter(**f).values('id').first()
|
||||
if obj:
|
||||
row['_' + key] = str(row[key])
|
||||
row[key] = obj['id']
|
||||
else:
|
||||
row['error'] = 'Không tồn tại ' + key + ' = ' + str(row[key])
|
||||
error = True
|
||||
|
||||
if error == False and len(keys)>0:
|
||||
for row in data:
|
||||
f = {}
|
||||
for obj in keys:
|
||||
key = obj['key']
|
||||
value = obj['value']
|
||||
if 'api' in value:
|
||||
name = value['field']
|
||||
f[name] = row[key]
|
||||
else:
|
||||
f[key] = row[key]
|
||||
obj = Model.objects.filter(**f).values('id').first()
|
||||
row['id'] = obj['id'] if obj !=None else None
|
||||
return error, data, keys
|
||||
|
||||
|
||||
def validate_duplicate(data, keys):
|
||||
arr = [o for o in data if o['id'] == None]
|
||||
ele = {}
|
||||
for row in arr:
|
||||
attr = ''
|
||||
for o in keys:
|
||||
key = '_' + o['key'] if getattr(o['value'], 'api') != None else o['key']
|
||||
attr += str(row[key]) + '#'
|
||||
attr = attr[0: len(attr)-1]
|
||||
counter = 0 if getattr(ele, attr) == None else getattr(ele, attr)
|
||||
ele[attr] = counter + 1
|
||||
array = []
|
||||
for key, value in ele.items():
|
||||
if value>1:
|
||||
arr1 = key.split('#')
|
||||
f = {}
|
||||
count = 0
|
||||
for o in keys:
|
||||
key = '_' + o['key'] if getattr(o['value'], 'api') != None else o['key']
|
||||
f[key] = int(arr1[count]) if getattr(o['value'], 'type')=='number' else arr1[count]
|
||||
count += 1
|
||||
arr2 = filter(arr, f)
|
||||
if len(arr2) >0:
|
||||
arr2[0]['error'] = str(arr1) + ' trùng ' + str(len(arr2)) + ' dòng'
|
||||
array += arr2
|
||||
return array
|
||||
|
||||
|
||||
def bulk_insert(data, fields, logcode):
|
||||
Model, serializer_class = get_serializer(logcode.model)
|
||||
count = 0
|
||||
batch = []
|
||||
success_count = 0
|
||||
error_count = 0
|
||||
arr = [o for o in data if o['id'] == None]
|
||||
total_records = len(arr)
|
||||
error = None
|
||||
try:
|
||||
for row in arr:
|
||||
count += 1
|
||||
batch.append(row)
|
||||
if count % 500 == 0 or count == total_records:
|
||||
print(count)
|
||||
serializer = serializer_class(data = batch, many=True)
|
||||
if serializer.is_valid():
|
||||
serializer.save()
|
||||
success_count += len(batch)
|
||||
else:
|
||||
print(serializer.errors)
|
||||
error = serializer.errors
|
||||
error_count += len(batch)
|
||||
batch = []
|
||||
logcode.success_count = success_count
|
||||
logcode.error_count = error_count
|
||||
logcode.total = count
|
||||
logcode.save()
|
||||
|
||||
except Exception as e:
|
||||
print('error', e)
|
||||
error = e
|
||||
return error, error_count, success_count, count
|
||||
|
||||
|
||||
def bulk_update(data, fields, logcode):
|
||||
Model, serializer_class = get_serializer(logcode.model)
|
||||
count = 0
|
||||
batch = []
|
||||
array = []
|
||||
success_count = 0
|
||||
error_count = 0
|
||||
error = None
|
||||
succ_count = 0 if logcode.success_count == None else logcode.success_count
|
||||
err_count = 0 if logcode.error_count == None else logcode.error_count
|
||||
total = 0 if logcode.total == None else logcode.total
|
||||
arr = [o for o in data if o['id'] != None]
|
||||
total_records = len(arr)
|
||||
try:
|
||||
for obj in arr:
|
||||
count += 1
|
||||
batch.append(obj)
|
||||
if count % 500 == 0 or count == total_records:
|
||||
qs = Model.objects.filter(id__in= [o['id'] for o in batch])
|
||||
for row in qs:
|
||||
found = find(batch, {'id': row.id})
|
||||
for key, value in fields.items():
|
||||
if getattr(value, 'api') != None:
|
||||
Model1, serializer_class1 = get_serializer(value['model'])
|
||||
setattr(row, key, Model1.objects.get(id=found[key]))
|
||||
else:
|
||||
setattr(row, key, found[key])
|
||||
array.append(row)
|
||||
print(count)
|
||||
try:
|
||||
Model.objects.bulk_update(array, list(fields.keys()))
|
||||
success_count += len(batch)
|
||||
except Exception as e:
|
||||
error_count += len(batch)
|
||||
error = e
|
||||
batch = []
|
||||
array = []
|
||||
logcode.success_count = succ_count + success_count
|
||||
logcode.error_count = err_count + error_count
|
||||
logcode.total = total + count
|
||||
logcode.save()
|
||||
except Exception as e:
|
||||
print('error', e)
|
||||
error = e
|
||||
return error, error_count, success_count, count
|
||||
|
||||
|
||||
def open_file(model, filename, fields, isfull=False):
|
||||
try:
|
||||
df = pandas.read_excel("Upload/" + filename, dtype=str, na_filter=False)
|
||||
data = df.to_json(path_or_buf= None, orient='table', force_ascii=False)
|
||||
data = json.loads(data)
|
||||
lack = check_fields(data['schema']['fields'], fields)
|
||||
if len(lack) > 0:
|
||||
return {'error': 'lack-fields', 'fields': lack}
|
||||
fields = field_related(model, fields)
|
||||
result, rows = check_data(data['data'], fields)
|
||||
if result != None:
|
||||
return {'error': 'data-error', 'data': rows, 'fields': data['schema']['fields']}
|
||||
else:
|
||||
return {'data': rows if isfull else rows[:1000], 'fields': data['schema']['fields'], 'total': len(rows)}
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
|
||||
def perform_import(code):
|
||||
Model, serializer_class = get_serializer('Import_Log')
|
||||
logcode = Model.objects.filter(code = code).first()
|
||||
progress = [{'code': 1, 'message': 'Bắt đầu kiểm tra bản ghi mới / tồn tại' + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'waiting'}]
|
||||
logcode.status = 'executing'
|
||||
logcode.progress = progress
|
||||
logcode.save()
|
||||
rs = open_file(logcode.model, logcode.file, logcode.fields, True)
|
||||
error, rows, keys = validate_key(logcode.model, rs['data'], logcode.fields)
|
||||
if error == True:
|
||||
arr = [getattr(o, 'error') for o in rows if getattr(o, 'error') != None][:10]
|
||||
logcode.note = ', '.join(arr)
|
||||
logcode.status = 'error'
|
||||
progress.append({'code': 2, 'message': 'Dữ liệu có lỗi: ' + logcode.note, 'type': 'error'})
|
||||
logcode.save()
|
||||
return
|
||||
|
||||
# inform
|
||||
progress.append({'code': 3, 'message': 'Hoàn tất kiểm tra dữ liệu mới / tồn tại' + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'success'})
|
||||
progress.append({'code': 3.1, 'message': 'Bắt đầu kiểm tra dữ liệu trùng lặp' + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'waiting'})
|
||||
logcode.save()
|
||||
|
||||
# check duplicate
|
||||
arr = validate_duplicate(rows, keys)
|
||||
if len(arr) >0:
|
||||
arr = [getattr(o, 'error') for o in arr if getattr(o, 'error') != None][:10]
|
||||
logcode.note = ', '.join(arr)
|
||||
logcode.status = 'error'
|
||||
progress.append({'code': 3.2, 'message': 'Dữ liệu bị trùng lặp, cần loại bỏ trùng lặp' + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'error'})
|
||||
logcode.save()
|
||||
return
|
||||
|
||||
progress.append({'code': 3, 'message': 'Hoàn tất dữ liệu trùng lặp' + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'success'})
|
||||
progress.append({'code': 4, 'message': 'Bắt đầu insert dữ liệu' + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'waiting'})
|
||||
logcode.save()
|
||||
|
||||
# insert
|
||||
error, error_count, success_count, count = bulk_insert(rows, logcode.fields, logcode)
|
||||
if error != None:
|
||||
logcode.note = error
|
||||
logcode.status = 'error'
|
||||
progress.append({'code': 5, 'message': 'Insert dữ liệu có lỗi: ' + str(error_count) + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'error'})
|
||||
logcode.save()
|
||||
return
|
||||
|
||||
progress.append({'code': 6, 'message': 'Insert dữ liệu thành công: ' + str(success_count) + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'success'})
|
||||
progress.append({'code': 7, 'message': 'Bắt đầu cập nhật dữ liệu' + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'waiting'})
|
||||
logcode.save()
|
||||
error, error_count, success_count, count = bulk_update(rows, logcode.fields, logcode)
|
||||
if error != None:
|
||||
logcode.note = error
|
||||
logcode.status = 'error'
|
||||
progress.append({'code': 8, 'message': 'Cập nhật dữ liệu có lỗi: ' + str(error_count) + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'error'})
|
||||
else:
|
||||
progress.append({'code': 9, 'message': 'Cập nhật dữ liệu thành công: ' + str(success_count) + ' - ' + datetime.now().strftime("%H:%M:%S"), 'type': 'success'})
|
||||
logcode.status = 'success'
|
||||
# save
|
||||
logcode.save()
|
||||
|
||||
# update after import success
|
||||
return serializer_class(logcode).data
|
||||
|
||||
|
||||
@api_view(['POST'])
|
||||
def bulk_import(request):
|
||||
perform_import(request.data['logcode'])
|
||||
return Response(status.HTTP_200_OK)
|
||||
|
||||
|
||||
@api_view(['POST'])
|
||||
def bulk_upload(request):
|
||||
if request.method == 'POST':
|
||||
file = request.data['file']
|
||||
filename = request.data['name']
|
||||
fields = request.data['fields']
|
||||
fields = json.loads(fields)
|
||||
model = request.data['model']
|
||||
try:
|
||||
with open(upload_folder + filename, 'wb+') as destination:
|
||||
for chunk in file.chunks():
|
||||
destination.write(chunk)
|
||||
result = open_file(model, filename, fields)
|
||||
return Response(result)
|
||||
except IOError as e: # Will only catch IOErrors
|
||||
return Response(e) # Re-raise other IOErrors
|
||||
except OSError as e: # Will only catch OSErrors
|
||||
return Response(e)
|
||||
|
||||
#=============================================================================
|
||||
@api_view(['GET'])
|
||||
def read_excel(request):
|
||||
try:
|
||||
filename = request.query_params['name']
|
||||
df = pandas.read_excel(upload_folder + filename, dtype=str, na_filter=False)
|
||||
val = df.to_json(path_or_buf= None, orient='table', force_ascii=False)
|
||||
return Response(val)
|
||||
|
||||
except IOError as e: # Will only catch IOErrors
|
||||
return Response(e) # Re-raise other IOErrors
|
||||
except OSError as e: # Will only catch OSErrors
|
||||
return Response(e)
|
||||
|
||||
|
||||
#=============================================================================
|
||||
@api_view(['GET'])
|
||||
def model_fields(request, name):
|
||||
Model, serializer_class = get_serializer(name)
|
||||
if Model == None:
|
||||
return Response(status=status.HTTP_400_BAD_REQUEST)
|
||||
|
||||
arr = []
|
||||
count = 0
|
||||
for field in Model._meta.get_fields():
|
||||
count += 1
|
||||
obj = {"id": count, "name": field.name, "datatype": field.get_internal_type(), "null": field.null,
|
||||
"unique": field.unique if hasattr(field, 'unique') else False,
|
||||
"related": field.related_model.__name__ if field.related_model else None }
|
||||
arr.append(obj)
|
||||
return Response(arr)
|
||||
|
||||
|
||||
#=============================================================================
|
||||
@api_view(['POST'])
|
||||
def find_key(request):
|
||||
Model, serializer_class = get_serializer(request.data['name'])
|
||||
fields = []
|
||||
for field in Model._meta.get_fields():
|
||||
obj = {"name": field.name, "related": field.related_model.__name__ if field.related_model else None }
|
||||
fields.append(obj)
|
||||
keys = request.data['keys']
|
||||
related = request.data['related']
|
||||
data = request.data['data']
|
||||
def find_field(name):
|
||||
for field in fields:
|
||||
if field['name'] == name:
|
||||
print('name', name, field['related'])
|
||||
|
||||
Model, serializerclass = get_serializer(field['related'])
|
||||
return field, Model
|
||||
|
||||
error = False
|
||||
if len(related)>0:
|
||||
for row in data:
|
||||
for obj in related:
|
||||
key = obj['key']
|
||||
value = obj['value']
|
||||
name = value['column']
|
||||
f = {name: row[key]}
|
||||
field, Model1 = find_field(value['field'])
|
||||
obj = Model1.objects.filter(**f).values('id').first()
|
||||
if obj:
|
||||
row[value['field']] = obj['id']
|
||||
else:
|
||||
row['error'] = 'Không tồn tại ' + key + ' = ' + str(row[key])
|
||||
error = True
|
||||
|
||||
if error == False and len(keys) > 0:
|
||||
for row in data:
|
||||
f = {}
|
||||
for obj in keys:
|
||||
key = obj['key']
|
||||
value = obj['value']
|
||||
|
||||
db_field = value.get('field') or value.get('column') or key
|
||||
|
||||
f[db_field] = row[key]
|
||||
|
||||
obj = Model.objects.filter(**f).values('id').first()
|
||||
row['id'] = obj['id'] if obj != None else None
|
||||
|
||||
return Response({"error": error, "data": data})
|
||||
Reference in New Issue
Block a user