-
Notifications
You must be signed in to change notification settings - Fork 0
/
recursos.js
122 lines (100 loc) · 3.98 KB
/
recursos.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
const deepCopy = require("deepcopy");
const coordinatesHelper = require('./coordinatesHelper');
/**
* GCP Vision groups several nearby words to appropriate lines
* But will not group words that are too far away
* This function combines nearby words and create a combined bounding polygon
*/
function initLineSegmentation(data) {
const yMax = coordinatesHelper.getYMax(data);
data = coordinatesHelper.invertAxis(data, yMax);
// The first index refers to the auto identified words which belongs to a sings line
let lines = data.textAnnotations[0].description.split('\n');
// gcp vision full text
let rawText = deepCopy(data.textAnnotations);
// reverse to use lifo, because array.shift() will consume 0(n)
lines = lines.reverse();
rawText = rawText.reverse();
// to remove the zeroth element which gives the total summary of the text
rawText.pop();
let mergedArray = getMergedLines(lines, rawText);
coordinatesHelper.getBoundingPolygon(mergedArray);
coordinatesHelper.combineBoundingPolygon(mergedArray);
// This does the line segmentation based on the bounding boxes
return constructLineWithBoundingPolygon(mergedArray);
}
// TODO implement the line ordering for multiple words
function constructLineWithBoundingPolygon(mergedArray) {
let finalArray = [];
for(let i=0; i< mergedArray.length; i++) {
if(!mergedArray[i]['matched']){
if(mergedArray[i]['match'].length === 0){
finalArray.push(mergedArray[i].description)
}else{
// arrangeWordsInOrder(mergedArray, i);
// let index = mergedArray[i]['match'][0]['matchLineNum'];
// let secondPart = mergedArray[index].description;
// finalArray.push(mergedArray[i].description + ' ' +secondPart);
finalArray.push(arrangeWordsInOrder(mergedArray, i));
}
}
}
return finalArray;
}
function getMergedLines(lines,rawText) {
let mergedArray = [];
while(lines.length !== 0) {
let l = lines.pop();
let l1 = deepCopy(l);
let status = true;
let data = "";
let mergedElement;
while (true) {
let wElement = rawText.pop();
if(wElement === undefined) {
break;
}
let w = wElement.description;
let index = l.indexOf(w);
let temp;
// check if the word is inside
l = l.substring(index + w.length);
if(status) {
status = false;
// set starting coordinates
mergedElement = wElement;
}
if(l === ""){
// set ending coordinates
mergedElement.description = l1;
mergedElement.boundingPoly.vertices[1] = wElement.boundingPoly.vertices[1];
mergedElement.boundingPoly.vertices[2] = wElement.boundingPoly.vertices[2];
mergedArray.push(mergedElement);
break;
}
}
}
return mergedArray;
}
function arrangeWordsInOrder(mergedArray, k) {
let mergedLine = '';
let wordArray = [];
let line = mergedArray[k]['match'];
// [0]['matchLineNum']
for(let i=0; i < line.length; i++){
let index = line[i]['matchLineNum'];
let matchedWordForLine = mergedArray[index].description;
let mainX = mergedArray[k].boundingPoly.vertices[0].x;
let compareX = mergedArray[index].boundingPoly.vertices[0].x;
if(compareX > mainX) {
mergedLine = mergedArray[k].description + ' ' + matchedWordForLine;
}else {
mergedLine = matchedWordForLine + ' ' + mergedArray[k].description;
}
}
return mergedLine;
}
var exports = module.exports = {};
exports.initLineSegmentation = function (data) {
return initLineSegmentation(data);
};