blob: 39feda2484988023bea19725fa8454876cb240b3 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
|
#!/bin/sh
#
# Parse stagit-style repos
#
if [ $# -lt 2 ]; then
printf "Usage: %s <urlbase> <destdir>\n" $0
exit 1
fi
FIN="/dev/stdin"
URLBASE="$1"
DEST="$2"
SUBPATH="/file"
CURL="torify curl -Ls "
PROTO=${URLBASE%%:\/\/*}
DIRBASE="$PROTO/${URLBASE##[a-z]*:\/\/}"
echo "proto: $PROTO"
echo "dirbase: $DIRBASE"
READMES="README.html README.txt.html README.md.html readme.html readme.txt.html readme.md.html"
repos=$($CURL "${URLBASE}" | xml2tsv | grep "/html/body/div/table/tbody/tr/td/a" \
| awk '{print $(NF-1), $NF}' \
| sed -E 's/href=//g;s/ /\|/'\
)
for r in $repos; do
name=$(echo "$r" | cut -d "|" -f 2 )
link=$(echo "$r" | cut -d "|" -f 1 )
link="${link%%log.html}"
baselink=$(printf "%s/%s" $URLBASE $link)
printf "link: %s\nbaselink: %s\n" $link $baselink 1>&2
REPODIR="$DEST/$DIRBASE/$link/"
mkdir -p $REPODIR
for f in $READMES; do
printf " trying file %s..." $baselink/$SUBPATH/$f
$CURL "$baselink/$SUBPATH/$f" > $REPODIR/$f.tmp
failure=$(xml2tsv < $REPODIR/$f.tmp 2>/dev/null | \
grep -Eaic "^/html/head/title[[:blank:]]+404 Not Found")
echo $failure
if [ "$failure" = 1 ]; then
printf "[FAILED]\n"
else
xml2tsv < $REPODIR/$f.tmp 2>/dev/null | \
grep -Eai "/html/body/div/pre/a[[:blank:]]+href=#.*[[:blank:]]+class=line" | \
cut -f 6- | \
sed -E 's/\\n//g;s/\\t/\t/g;s/\\\\/\\/g' > $REPODIR/$f
printf "[OK]\n"
fi
rm -f $REPODIR/$f.tmp
sleep 1
done
done
|