-
Notifications
You must be signed in to change notification settings - Fork 5
/
csv2sql
executable file
·90 lines (74 loc) · 4.24 KB
/
csv2sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/bin/sh
# Pierre Chevalier
# Ce script fabrique une table tmp_ dans le schéma tmp_imports de
# la bd postgresql GLL_BD_NAME dans l'hôte GLL_BD_HOST, à partir
# d'un fichier .csv de départ.
# Tous les champs sont créés de manière générique en varchar
# pour le moment; TODO faudrait itérer pour détecter les numériques,
# mais bof: faire ça plus tard, en python par exemple, ou en Red, Rebol.
# $1: fichier .csv
# MARCHE PAS => ce qui marche pas est indénté avec une tabulation
# et commenté.
#$2: option verbeuse -v
#$1 et $2 sont intervertibles
# This script creates a temporary tmp_ table in the tmp_imports schema
# of the GLL_BD_NAME postgresql database served by host GLL_BD_HOST,
# from a .csv file.
# All field types are generically varchar, for the time being;
# TODO it should iterate to determine numeric fields, but bof;
# do that later, in Red, and/or Rebol, and/or Python.
# $1: input .csv file
# DOES NOT WORK => what doesn't work is indented with a tab and
# commented out
#$2: option verbeuse -v
#$1 et $2 sont intervertibles
# echo "=============================================================="
fichier_orig=$1
#option=$2
# if [fichier_orig==-v]; then option=$1; fichier_orig=$2; fi
# echo "*** DEBUG: option: $option fichier_orig: $fichier_orig ***"
# if [option=="-v"]; then echo "Verbose mode"; fi
# assainissement du nom (léger, à améliorer; faire appel à sanity.pl? (dépendance...))
# filename sanitysation (light, to be improved; call sanity.pl ? (dependancy...))
fichier=`echo $fichier_orig | sed -e 's/" "/_/g' | sed -e 's/-/_/g' | tr "[:upper:]" "[:lower:]"`
if [ ! $fichier_orig==$fichier ]; then mv fichier_orig fichier; fi
table=`echo tmp_$fichier | sed -e 's/\.csv//g' `
schema="tmp_imports"
# if [option=="-v"]; then
# # echo "--------------------------------------------------------------"
# echo "Import of data from $fichier into $schema.$table table from $GLL_BD_NAME database."
# echo "Input file lines count: `wc -l $fichier`"
# fi
# echo "--------------------------------------------------------------"
cmd_sql="DROP TABLE IF EXISTS $schema.$table;"
# echo $cmd_sql
echo $cmd_sql | psql -X -h $GLL_BD_HOST -p $GLL_BD_PORT -U $GLL_BD_USER -d $GLL_BD_NAME
# echo "--------------------------------------------------------------"
cmd_sql="CREATE TABLE $schema.$table (`head -1 $fichier | sed -e 's/"//g' | sed -e 's/,\ /,/g' | sed -e 's/ /_/g' | sed -e 's/,/ varchar, /g'` varchar);"
# echo $cmd_sql
echo $cmd_sql | psql -X -h $GLL_BD_HOST -p $GLL_BD_PORT -U $GLL_BD_USER -d $GLL_BD_NAME
# echo "--------------------------------------------------------------"
cmd_sql="DELETE FROM $schema.$table;"
# echo $cmd_sql
echo $cmd_sql | psql -X -h $GLL_BD_HOST -p $GLL_BD_PORT -U $GLL_BD_USER -d $GLL_BD_NAME
# echo "--------------------------------------------------------------"
# echo "Import ..."
tail -n +2 $fichier | psql -X -h $GLL_BD_HOST -p $GLL_BD_PORT -U $GLL_BD_USER -d $GLL_BD_NAME -c "\COPY $schema.$table FROM stdin WITH CSV"
# echo " done."
# echo "--------------------------------------------------------------"
# if [option=="-v"]; then
# echo "Record count of table $schema.$table: `echo "SELECT count(*) FROM $schema.$table;" | psql -X -h $GLL_BD_HOST -p $GLL_BD_PORT -U $GLL_BD_USER -d $GLL_BD_NAME | head -3 | tail -1`"
# echo "--------------------------------------------------------------"
# echo "Overview of table $schema.$table (2 first records):"
# echo "SELECT * FROM $schema.$table LIMIT 2;" | psql -X -h $GLL_BD_HOST -p $GLL_BD_PORT -U $GLL_BD_USER -d $GLL_BD_NAME
# echo "... (and 2 last records):"
# echo "SELECT * FROM $schema.$table OFFSET (SELECT count(*) FROM $schema.$table) -2 ;" | psql -X -h $GLL_BD_HOST -p $GLL_BD_PORT -U $GLL_BD_USER -d $GLL_BD_NAME | tail -n +3
# echo "--------------------------------------------------------------"
# echo "If you saw your data, it is ok in $schema.$table."
# fi
echo "Data from $fichier_orig should be in $schema.$table. To browse it:"
echo "From the shell:"
echo "echo \"SELECT * FROM $schema.$table;\" | psql -X -h $GLL_BD_HOST -p $GLL_BD_PORT -U $GLL_BD_USER -d $GLL_BD_NAME | less"
echo "From a psql or equivalent:"
echo "SELECT * FROM $schema.$table;"
#echo "=============================================================="