2017-10-28

標準入力から数値列を読み込んで、昇順にソートして標準出力に吐き出す

Java C C++ PHP Python Ruby Perl Go bash

手元にある各言語で、標準入力から数値列を読み込んで、昇順にソートしたうえで標準出力に吐き出すプログラムを書いてみようと思ったメモ。

標準入力から入力される数値列の要件は以下の通り。

1行に1つの数値が書かれている
- 不正入力のチェックは不要とする
最大で256個の数値が入力される
入力される数値は符号付32ビット整数とする

環境

手元にあるものということで、環境は以下のものに限定する。

CentOS 7
- Java (openjdk version "1.8.0_131")
- C (gcc (GCC) 4.8.5)
  - -std=gnu11でコンパイル
- C++ (g++ (GCC) 4.8.5)
  - -std=gnu++1yでコンパイル
- PHP (PHP 5.4.16 (cli))
- Python 2 (Python 2.7.5)
- Python 3 (Python 3.6.3)
  - ソースからビルドしたもの
- Ruby (ruby 2.0.0p648)
- Perl (v5.16.3)
- Go (go version go1.8.3 linux/amd64)
- bash (4.2.46(1)-release)

入力ファイルの例

001.txt

0
1
-1
256
-256
32768
-32768
2147483647
-2147483648

期待される出力の例

001.txt

-2147483648
-32768
-256
-1
0
1
256
32768
2147483647

Java

入力される数値の上限数が分かっているということで、Javaでは2パターン作ってみた。

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.Arrays;

public class Main {
    public static void main(String[] args) {
        try (BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
            PrintWriter out = new PrintWriter(System.out)
        ) {
            String buf;
            int count = 0;
            int[] ary = new int[256];
            while ((buf = in.readLine()) != null) {
                ary[count] = Integer.parseInt(buf);
                ++count;
            }
            Arrays.sort(ary, 0, count);
            for (int i = 0; i < count; ++i) {
                out.println(ary[i]);
            }
        } catch (NumberFormatException e) {
            // 今回は不正入力のチェックは不要なので、RuntimeExceptionを投げておく。
            throw new RuntimeException(e);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Scanner;

public class Main {
    public static void main(String[] args) {
        try (Scanner in = new Scanner(System.in);
            PrintWriter out = new PrintWriter(System.out)
        ) {
            List<Integer> list = new ArrayList<>();
            while (in.hasNextInt()) {
                list.add(in.nextInt());
            }
            Collections.sort(list);
            for (Integer num : list) {
                out.println(num);
            }
        }
    }
}

C

#include <stdio.h>
#include <stdlib.h>

int cmp(const void* pa, const void* pb) {
    int a = *(const int*)pa;
    int b = *(const int*)pb;
    if (a < b) {
        return -1;
    } else if (a > b) {
        return 1;
    } else {
        return 0;
    }
}

int main(int argc, char** argv) {
    int ary[256];
    int count = 0;
    while (scanf("%ld", &ary[count]) == 1) {
        ++count;
    }
    qsort(ary, count, sizeof(int), cmp);
    for (int i = 0; i < count; ++i) {
        printf("%d\n", ary[i]);
    }

    return 0;
}

C++

#include <algorithm>
#include <iostream>
#include <vector>

using namespace std;

int main(int argc, char** argv) {
    vector<int> list;
    int num;
    int count = 0;

    while (cin >> num) {
        list.push_back(num);
        ++count;
    }
    sort(list.begin(), list.end());
    for (int i = 0; i < list.size(); ++i) {
        cout << list[i] << endl;
    }

    return EXIT_SUCCESS;
}

PHP

<?php

$lines = file('php://stdin');
$ary = array();
foreach ($lines as $line) {
    $ary[] = (int)$line;
}
sort($ary);
foreach ($ary as $num) {
    printf("%d\n", $num);
}

Python 2

import sys

list = []
while True:
    line = sys.stdin.readline()
    if line == '':
        break
    list.append(int(line))

list.sort()

for num in list:
    print num

Python 3

import sys

list = []
while True:
    line = sys.stdin.readline()
    if line == '':
        break
    list.append(int(line))

list.sort()

for num in list:
    print(num)

Ruby

list = []
while line = STDIN.gets
    num = line.to_i
    list.push(num)
end
list = list.sort()
for num in list
    print num,"\n"
end

Perl

my $line;
my @list = ();
my $count = 0;
while ($line = readline(STDIN)) {
    my $num = $line + 0;
    $list[$count] = $num;
    ++$count;
}
@list = sort {$a <=> $b} @list;
for (my $i = 0; $i < $count; ++$i) {
    print "$list[$i]\n";
}

Go

package main

import (
    "bufio"
    "fmt"
    "io"
    "os"
    "strconv"
    "sort"
)

func main() {
    tmp := make([]int, 256)
    count := 0
    stdin := bufio.NewReader(os.Stdin)
    buf := make([]byte, 0, 1024)
    for {
        line, prefix, err := stdin.ReadLine()
        if err == io.EOF {
            break
        }
        buf = append(buf, line...)
        if prefix {
            continue
        }
        s := string(buf)
        num, err2 := strconv.Atoi(s)
        if err2 != nil {
            panic(err2)
        }
        tmp[count] = num
        count++
        buf = make([]byte, 0, 1024)
    }
    ary := make([]int, count)
    for i := 0; i < count; i++ {
        ary[i] = tmp[i]
    }
    sort.Ints(ary)
    for i := 0; i < count; i++ {
        fmt.Println(ary[i])
    }
}

これが最適なコードなのかどうかよく分からん‥

bash

#! /bin/bash

sort -n

2017-10-27

入力文字列をparseIntで解析する場合とScannerを使用する場合との速度の違い

Java

プログラミングコンテストとかでjava.util.Scannerを使って入力の数値を取得しようとすると、どうしてもInteger.parseIntした場合より実行速度が遅くなってしまうということで、どの程度のものなのかを簡単に調べてみたメモ。

検証環境

CentOS 7のVM(VirtualBox on Windows 10 on Let's Note CF-SX2)上で、OpenJDK 1.8.0_131を使用して測定。

ソースコード

parseInt版

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;

public class Main {
    public static void main(String[] args) {
        long S = System.currentTimeMillis();

        try (BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
            PrintWriter out = new PrintWriter(System.out)
        ) {
            String buf;
            while ((buf = in.readLine()) != null) {
                String[] tokens = buf.split(" ");
                for (String token : tokens) {
                    int num = Integer.parseInt(token);
                    System.out.println(num);
                }
            }
            out.flush();
        } catch (IOException e) {
            e.printStackTrace();
        }

        long G = System.currentTimeMillis();
        System.err.println((G - S) + "ms");
    }
}

Scanner版

import java.io.PrintWriter;
import java.util.Scanner;

public class Main {
    public static void main(String[] args) {
        long S = System.currentTimeMillis();

        Scanner sc = new Scanner(System.in);
        PrintWriter out = new PrintWriter(System.out);
        while (sc.hasNextInt()) {
            int num = sc.nextInt();
            out.println(num);
        }
        out.flush();

        long G = System.currentTimeMillis();
        System.err.println((G - S) + "ms");
    }
}

入力データ

パターン1

あまり入力数が多くないところで、1行に1数値の場合と1行に5数値の場合のデータを用意して比較。例えば、1行に5数値で1,000行の場合のデータは以下のような感じになる。

1 1 1 1 1
2 2 2 2 2
3 3 3 3 3
4 4 4 4 4
5 5 5 5 5
：
：
996 996 996 996 996
997 997 997 997 997
998 998 998 998 998
999 999 999 999 999
1000 1000 1000 1000 1000

パターン2

では、本当にScannerの方がどんなケースでも遅いのか、と思って以下のパターンを試してみた。

N=10,000、100,000～1,000,000の11ケースで以下のような入力を食わせてみた。

1
2
3
4
5
：
：
N-4
N-3
N-2
N-1
N

測定結果

パターン1

こちらは予想通りというか、Scanner版の方が実行速度がかかっている。

N	parseInt(1)	Scanner(1)	parseInt(5)	Scanner(5)
0	0	36
1000	40	98	82	131
2000	51	121	130	175
3000	65	126	160	191
4000	69	130	183	246
5000	76	141	191	242
6000	79	153	210	267
7000	83	150	229	289
8000	108	163	257	323
9000	113	169	292	330

f:id:hhelibex:20171027220416p:plain

パターン2

N=200,000から300,000の辺りで速度が逆転している。大量の入力に対しては、なぜかparseInt版の方が速度的には不利なようだ。

N	parseInt	Scanner
10000	117	172
100000	452	471
200000	619	690
300000	804	761
400000	957	862
500000	1157	1060
600000	1276	1122
700000	1512	1105
800000	1692	1188
900000	1891	1321
1000000	1976	1412

f:id:hhelibex:20171027220508p:plain

2017-10-24

JDKのバージョンによる文字列連結処理の速度の違い

Java

過去にもなんか調べた気がしないでもないが、Java 9リリース記念ということで、Sun/Oracle JDK限定だが、JDKのバージョンを変えたときの「+」連結/StringBuffer/StringBuilderの速度の違いをざっと調べてみた。

平均値とか出すのが面倒だったので、測定は一発勝負(ぉ‥

測定環境

CentOS 6のVM(VirtualBox on Windows 10 on Let's Note CF-SX2)上で、以下のJava VMを使って測定(java -versionの出力)。

java version "1.4.2_19"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.4.2_19-b04)
Java HotSpot(TM) Client VM (build 1.4.2_19-b04, mixed mode)

java version "1.5.0_22"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.5.0_22-b03)
Java HotSpot(TM) Client VM (build 1.5.0_22-b03, mixed mode, sharing)

java version "1.6.0_45"
Java(TM) SE Runtime Environment (build 1.6.0_45-b06)
Java HotSpot(TM) 64-Bit Server VM (build 20.45-b01, mixed mode)

java version "1.7.0_72"
Java(TM) SE Runtime Environment (build 1.7.0_72-b14)
Java HotSpot(TM) 64-Bit Server VM (build 24.72-b04, mixed mode)

java version "1.8.0_66"
Java(TM) SE Runtime Environment (build 1.8.0_66-b17)
Java HotSpot(TM) 64-Bit Server VM (build 25.66-b17, mixed mode)

java version "9"
Java(TM) SE Runtime Environment (build 9+181)
Java HotSpot(TM) 64-Bit Server VM (build 9+181, mixed mode)

ソースコードはこの後に載せるが、コンパイルは以下のような感じで行った。(Xxxの部分には「Plus」「StringBuffer」「StringBuilder」が入る)

「Main14Xxx」というクラス名のものはJDK 1.4.2_19のjavacでコンパイル
「Main15Xxx」というクラス名のものはJDK 1.5.0_22のjavacでコンパイル

ソースコード

MainNnPlus (Nnの部分には「14」「15」が入る)

public class MainNnPlus {
    public static void main(String[] args) {
        int n = args.length > 0 ? Integer.parseInt(args[0]) : 10000;

        long S = System.currentTimeMillis();

        String s = "";
        for (int i = 0; i < n; ++i) {
            s += "a";
        }

        long G = System.currentTimeMillis();
        System.out.println(System.getProperty("java.version") + ":" + n + ":" + (G - S));
    }
}

MainNnStringBuffer (Nnの部分には「14」「15」が入る)

public class MainNnStringBuffer {
    public static void main(String[] args) {
        int n = args.length > 0 ? Integer.parseInt(args[0]) : 10000;

        long S = System.currentTimeMillis();

        StringBuffer sb = new StringBuffer();
        for (int i = 0; i < n; ++i) {
            sb.append("a");
        }

        long G = System.currentTimeMillis();
        System.out.println(System.getProperty("java.version") + ":" + n + ":" + (G - S));
    }
}

Main15StringBuilder

public class Main15StringBuilder {
    public static void main(String[] args) {
        int n = args.length > 0 ? Integer.parseInt(args[0]) : 10000;

        long S = System.currentTimeMillis();

        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < n; ++i) {
            sb.append("a");
        }

        long G = System.currentTimeMillis();
        System.out.println(System.getProperty("java.version") + ":" + n + ":" + (G - S));
    }
}

実行（測定）

実行（測定）は以下のようなシェルスクリプトを組んで行った。

#! /bin/bash

d1=3000
max1=30000
d2=10000000
max2=100000000

~/jdk/j2sdk1.4.2_19/bin/javac Main14*.java
~/jdk/jdk1.5.0_22/bin/javac Main15*.java

for c in Main{14,15}Plus ; do
    echo === ${c}
    for ((n = d1; n <= max1; n += d1)); do
        ~/jdk/run.sh -Xmx1024m ${c} ${n}
    done
done
for c in Main{14,15}StringBuffer ; do
    echo === ${c}
    for ((n = d2; n <= max2; n += d2)); do
        ~/jdk/run.sh -Xmx1024m ${c} ${n}
    done
done
for c in Main15StringBuilder ; do
    echo === ${c}
    for ((n = d2; n <= max2; n += d2)); do
        ~/jdk/run.sh -Xmx1024m ${c} ${n}
    done
done

~/jdk/run.shの中身は以下。

#! /bin/bash

d=$(dirname $0)

for jdk in j2sdk1.4.2_19 jdk1.5.0_22 jdk1.6.0_45 jdk1.7.0_72 jdk1.8.0_66 jdk-9 ; do
    if [ -d ${d}/${jdk} ]; then
        ${d}/${jdk}/bin/java $@
    fi
done

測定結果

Main14Plus

	1.4.2_19	1.5.0_22	1.6.0_45	1.7.0_72	1.8.0_66	9
3000	11	19	16	22	15	17
6000	28	47	63	72	50	36
9000	57	99	94	103	82	66
12000	113	190	150	154	131	89
15000	216	289	235	213	177	116
18000	357	459	292	225	244	169
21000	538	742	374	264	331	192
24000	793	1064	485	291	402	256
27000	1104	1538	593	319	517	296
30000	1469	2042	726	356	596	352

f:id:hhelibex:20171024225451p:plain

Main15Plus

	1.5.0_22	1.6.0_45	1.7.0_72	1.8.0_66	9
3000	15	17	22	17	16
6000	47	57	70	49	34
9000	103	90	103	78	56
12000	178	144	152	120	85
15000	291	218	196	173	111
18000	476	292	227	253	150
21000	743	380	255	307	182
24000	1075	494	282	394	232
27000	1552	613	321	495	291
30000	2092	751	367	626	353

f:id:hhelibex:20171024225701p:plain

Main14StringBuffer

	1.4.2_19	1.5.0_22	1.6.0_45	1.7.0_72	1.8.0_66	9
10000000	389	269	211	208	191	142
20000000	771	519	397	422	357	241
30000000	1125	809	542	583	500	339
40000000	1541	1068	756	828	716	471
50000000	1993	1310	939	1009	855	584
60000000	2264	1564	1412	1291	1053	839
70000000	2704	1783	1272	1379	1142	760
80000000	3185	2246	1789	1909	1703	1034
90000000	3982	3027	1837	2290	1824	1070
100000000	4518	3455	2542	2108	1665	1120

f:id:hhelibex:20171024225800p:plain

Main15StringBuffer

	1.5.0_22	1.6.0_45	1.7.0_72	1.8.0_66	9
10000000	369	331	330	272	169
20000000	544	409	608	523	362
30000000	778	659	649	487	444
40000000	1601	751	1173	969	444
50000000	1305	953	1101	1219	762
60000000	2030	1068	1223	1413	920
70000000	2207	1359	1570	1133	707
80000000	2772	1821	2046	1670	889
90000000	2589	2355	2107	1924	1414
100000000	3042	2324	2224	2274	1180

f:id:hhelibex:20171024225915p:plain

Main15StringBuilder

	1.5.0_22	1.6.0_45	1.7.0_72	1.8.0_66	9
10000000	349	213	203	168	63
20000000	440	286	331	318	157
30000000	808	324	315	298	131
40000000	906	543	655	617	242
50000000	1488	817	778	713	252
60000000	1388	639	636	687	328
70000000	2010	1032	804	644	266
80000000	1782	1309	1183	1047	507
90000000	2530	1117	996	951	387
100000000	2694	1389	1421	1413	567

f:id:hhelibex:20171024230039p:plain

2017-10-23

C++の標準入出力についてのメモ

C++

プログラミングコンテストなどでよく見かける以下のコード断片を「おまじない」で片づけるのが嫌だったので調べてみたメモ。

ios_base::sync_with_stdio(false);
cin.tie(NULL);

答えは以下のサイトで解説されているのだが‥

c++ - Significance of ios_base::sync_with_stdio(false); cin.tie(NULL); - Stack Overflow

試してみないと気が済まなかったので試してみた。

`ios_base::sync_with_stdio(false)`の検証

以下のようなコードを実行してみる。

#include <cstdio>
#include <iostream>

using namespace std;

void test1(int n, bool sync = false) {
    for (int i = 0; i < n; ++i) {
        cout << 'a';
        if (sync) {
            flush(cout);
        }
        printf("A");
        if (sync) {
            fflush(stdout);
        }
    }

    fflush(stdout);
    cout << endl;
}

int main(int argc, char* argv[]) {
    int n = 10;

    test1(n);

    ios_base::sync_with_stdio(false);
    test1(n);

    test1(n, true);
}

これを実行すると、以下のような出力が得られる。

aAaAaAaAaAaAaAaAaAaA
AAAAAAAAAAaaaaaaaaaa
aAaAaAaAaAaAaAaAaAaA

1行目の出力は、coutとstdoutが同期されている状態なので、flushとかfflushをしなくても、それぞれに書き込んだ文字が交互に出力されることの確認。
2行目の出力は、coutとstdoutの同期を切っているので、末尾でfflushを先にしているstdoutへの出力が先にまとめて出てきて、その後にcoutへの出力がまとめて出てくることの確認。
3行目の出力は、手動で同期をしているので、1行目と同じ出力になることの確認。

上記サイトの回答にあるように、これの副作用として標準出力への出力のパフォーマンスが上がるというわけか。

`cin.tie(NULL)`の検証

以下のようなコードを実行してみる。

#include <iostream>

using namespace std;

void test2(bool tie = false) {
    string name;

    cout << "Enter name:";
    if (tie) {
        flush(cout);
    }
    cin >> name;
    cout << name << endl;
}

int main(int argc, char* argv[]) {
    test2();

    cin.tie(NULL);
    test2();

    ios_base::sync_with_stdio(false);
    test2();

    test2(true);
}

これを実行し、4回の入力が求められるので、"A"、"B"、"C"、"D"を順にコンソールから入力した結果が以下。

Enter name:A
A
Enter name:B
B
C
Enter name:C
Enter name:D
D

1回目は、cinとcoutが結び付けられた状態なので、cinで読み込む前にcoutに書き込んだプロンプト("Enter name:")が画面に出てくることの確認。
2回目は、cinとcoutの結びつきを切ってみたのだが、1回目と出力が変わらなかった。
3回目は、ならば、と先に試したios_base::sync_with_stdio(false)をやってみたらどうだろうと試した結果、今度は期待通りにcoutに書き込んだプロンプトが即座には出てこなかった。
4回目は、手動でflushしているので、1回目と同じ結果になることの確認。

cin.tie(NULL)だけでは動作上の変化が無くて、ios_base::sync_with_stdio(false)も合わせてしてやらないといけないらしい。cinから読み込むときに、stdioとの同期が有効になっているとstdinとも結びついていることになるから、同期しようとして2回目のケースでプロンプトが先に出てくるということか。

2017-10-18

標準入力から1行ずつ読み込んで数値解析して標準出力に吐き出す

Java C C++ PHP Python Ruby Perl Go bash Awk

唐突に、手元にある各言語で標準入力から1行ずつ読み込んで、行の先頭の数値として解析できる部分を数値に変換して標準出力に吐き出すプログラムを書いてみようと思ったメモ。

例えば、「+123i456」という行があったら、「+123」までが数値として解析できる(その後ろの「i」が数値を構成する要素でない)ので、「123」と出力する(数値型に変換したら、普通の言語では「+」記号は出力されないため)。

環境

手元にあるものということで、環境は以下のものに限定する。

CentOS 7
- Java (openjdk version "1.8.0_131")
- C (gcc (GCC) 4.8.5)
  - -std=gnu11でコンパイル
- C++ (g++ (GCC) 4.8.5)
  - -std=gnu++1yでコンパイル
- PHP (PHP 5.4.16 (cli))
- Python 2 (Python 2.7.5)
- Python 3 (Python 3.6.3)
  - ソースからビルドしたもの
- Ruby (ruby 2.0.0p648)
- Perl (v5.16.3)
- Go (go version go1.8.3 linux/amd64)
- bash (4.2.46(1)-release)
- Awk (GNU Awk 4.0.2)

入力ファイル

001.txt

期待される出力

001.txt

Java

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;

public class Main {
    public static void main(String[] args) {
        try (BufferedReader in = new BufferedReader(new InputStreamReader(System.in))) {
            String buf;
            while ((buf = in.readLine()) != null) {
                buf = buf.replaceAll("^([+-]?[0-9]+).*$", "$1");
                int num = Integer.parseInt(buf);
                System.out.println(num);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

C

#include <stdio.h>

int main(int argc, char** argv) {
    char buf[1024];

    while (fgets(buf, sizeof(buf), stdin)) {
        int num;
        sscanf(buf, "%d", &num);
        printf("%d\n", num);
    }

    return 0;
}

C++

#include <iostream>

using namespace std;

int main(int argc, char** argv) {
    char buf[1024];

    while (cin.getline(buf, sizeof(buf))) {
        int num;
        sscanf(buf, "%d", &num);
        cout << num << endl;
    }

    return EXIT_SUCCESS;
}

PHP

<?php

$lines = file('php://stdin');
foreach ($lines as $line) {
    $num = (int)$line;
    printf("%d\n", $num);
}

Python 2

import sys
import re

patStr = r'^[+-]?[0-9]+'
pattern = re.compile(patStr)

while True:
    line = sys.stdin.readline()
    if line == '':
        break
    matcher = pattern.match(line)
    line = line[matcher.start():matcher.end()]
    num = int(line)
    print num

Python 3

import sys
import re

patStr = r'^[+-]?[0-9]+'
pattern = re.compile(patStr)

while True:
    line = sys.stdin.readline()
    if line == '':
        break
    matcher = pattern.match(line)
    line = line[matcher.start():matcher.end()]
    num = int(line)
    print(num)

Ruby

while line = STDIN.gets
    num = line.to_i
    print num,"\n"
end

Perl

my $line;
while ($line = readline(STDIN)) {
    my $num = $line + 0;
    print $num,"\n";
}

Go

package main

import (
    "bufio"
    "fmt"
    "io"
    "os"
    "strconv"
    "regexp"
)

func main() {
    stdin := bufio.NewReader(os.Stdin)
    buf := make([]byte, 0, 1024)
    for {
        line, prefix, err := stdin.ReadLine()
        if err == io.EOF {
            break
        }
        buf = append(buf, line...)
        if prefix {
            continue
        }
        s := string(buf)
        s = regexp.MustCompile(`^([+-]?[0-9]+).*`).ReplaceAllString(s, "$1")
        num, err2 := strconv.Atoi(s)
        if err2 != nil {
            panic(err2)
        }
        fmt.Println(num)
        buf = make([]byte, 0, 1024)
    }
}

bash

#! /bin/bash

while IFS=$'\n' read line ; do
    num=$(echo ${line} | sed -e 's/^\([+-]*[0-9][0-9]*\).*/\1/')
    num=$((num+0))
    echo ${num}
done

Awk

{
    s = gensub(/^([-+]?[0-9]+).*$/, "\\1", "g", $0);
    print s + 0;
}

2017-10-17

標準入力から1バイトずつ読み込んで、大文字小文字変換をして標準出力に吐き出すプログラムを書いてみる

Java C C++ PHP Python Ruby Perl Go bash

唐突に、手元にある各言語で標準入力から1バイトずつ読み込んで、大文字小文字変換をしたうえで標準出力に吐き出すプログラムを書いてみようと思ったメモ。

大文字小文字判定等を行う関数をまじめに使った言語もあれば、正規表現に頼った言語もあったり、果てはASCII文字のコード値に頼ったプログラムになる言語があったりといろいろだが、とりあえず現状の知識ということで気にしない。

(2017/10/18追記)Go言語でbyteからruneに変換すればunicodeパッケージの関数で対応できることが分かったので書き換え。

環境

手元にあるものということで、環境は以下のものに限定する。

CentOS 7
- Java (openjdk version "1.8.0_131")
- C (gcc (GCC) 4.8.5)
  - -std=gnu11でコンパイル
- C++ (g++ (GCC) 4.8.5)
  - -std=gnu++1yでコンパイル
- PHP (PHP 5.4.16 (cli))
- Python 2 (Python 2.7.5)
- Python 3 (Python 3.6.3)
  - ソースからビルドしたもの
- Ruby (ruby 2.0.0p648)
- Perl (v5.16.3)
- Go (go version go1.8.3 linux/amd64)
- bash (4.2.46(1)-release)

入力ファイル

od -cした結果を載せておく。

001.txt

0000000   H   e   l   l   o       W   o   r   l   d   !  \n
0000015

002.txt

0000000       !   "   #   $   %   &   '   (   )   *   +   ,   -   .   /
0000020   0   1   2   3   4   5   6   7   8   9   :   ;   <   =   >   ?
0000040   @   A   B   C   D   E   F   G   H   I   J   K   L   M   N   O
0000060   P   Q   R   S   T   U   V   W   X   Y   Z   [   \   ]   ^   _
0000100   `   a   b   c   d   e   f   g   h   i   j   k   l   m   n   o
0000120   p   q   r   s   t   u   v   w   x   y   z   {   |   }   ~  \n
0000140

Java

import java.io.IOException;

public class Main {
    public static void main(String[] args) {
        try {
            int ch;
            while ((ch = System.in.read()) != -1) {
                if (Character.isLowerCase(ch)) {
                    ch = Character.toUpperCase(ch);
                } else if (Character.isUpperCase(ch)) {
                    ch = Character.toLowerCase(ch);
                }
                System.out.write(ch);
            }
            System.out.flush();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

C

#include <stdio.h>
#include <ctype.h>

int main(int argc, char** argv) {
    int ch;

    while ((ch = getchar()) != -1) {
        if (islower(ch)) {
            ch = toupper(ch);
        } else if (isupper(ch)) {
            ch = tolower(ch);
        }
        putchar(ch);
    }

    return 0;
}

C++

#include <iostream>

using namespace std;

int main(int argc, char** argv) {
    char ch;

    while (cin.get(ch)) {
        if (islower(ch)) {
            ch = toupper(ch);
        } else if (isupper(ch)) {
            ch = tolower(ch);
        }
        cout << ch;
    }

    return EXIT_SUCCESS;
}

cctypeはincludeしなくていいのか？と思ったら、iostreamの中身を追っていくと、cctypeをincludeしているので、自分で明示する必要が無いのだった。

PHP

<?php

$in = fopen('php://stdin', 'r');
$out = fopen('php://stdout', 'w');
while (($ch = fgetc($in)) !== FALSE) {
    if (preg_match('/[a-z]/', $ch)) {
        $ch = strtoupper($ch);
    } else if (preg_match('/[A-Z]/', $ch)) {
        $ch = strtolower($ch);
    }
    fputs($out, $ch);
}

最初、ctype系の関数を使おうと思ったのだが、以下の記事を読んだら嫌な予感がしてきたので、正規表現で判定するようにした。

ctype_digit関数の罠 - hnwの日記

Python 2

import sys;

while True:
    ch = sys.stdin.read(1)
    if ch == '':
        break
    if ch.islower():
        ch = ch.upper()
    elif ch.isupper():
        ch = ch.lower()
    sys.stdout.write(ch)

Pythonのislower/isupperには以下のような罠が潜んでいるらしいので注意。今回は1文字ずつに切り分けているのでハマることはなかったが。

Pythonの islower() と isupper() の判定処理のワナ - 強火で進め

Python 3

import sys;

while True:
    ch = sys.stdin.buffer.read(1)
    if ch == b'':
        break
    if ch.islower():
        ch = ch.upper()
    elif ch.isupper():
        ch = ch.lower()
    sys.stdout.buffer.write(ch)

Ruby

while ch = STDIN.getc
    ch = ch.swapcase
    STDOUT.putc(ch.chr)
end

大文字小文字判定をするメソッドが見つけられなかったので逃げた例‥

Perl

binmode(STDIN);
while (undef != read(STDIN, $ch, 1)) {
    if ($ch =~ /[a-z]/) {
        $ch = uc($ch);
    } elsif ($ch =~ /[A-Z]/) {
        $ch = lc($ch);
    }
    print $ch;
}

Go

初期版

package main

import (
    "bufio"
    "io"
    "os"
)

func main() {
    stdin := bufio.NewReader(os.Stdin)
    stdout := bufio.NewWriter(os.Stdout)
    for {
        ch, err := stdin.ReadByte()
        if err == io.EOF {
            break
        }
        if ('a' <= ch && ch <= 'z') {
            ch = ch - ('a' - 'A')
        } else if ('A' <= ch && ch <= 'Z') {
            ch = ch + ('a' - 'A')
        }
        stdout.WriteByte(ch)
    }
    stdout.Flush()
}

byteをbyteのままで大文字小文字判定および変換する手段を見つけられずに、ASCIIコードのコード値に頼ってしまいました‥

2017/10/18書き換え版

package main

import (
    "bufio"
    "io"
    "os"
    "unicode"
)

func main() {
    stdin := bufio.NewReader(os.Stdin)
    stdout := bufio.NewWriter(os.Stdout)
    for {
        ch, err := stdin.ReadByte()
        if err == io.EOF {
            break
        }
        r := rune(ch)
        if (unicode.IsLower(r)) {
            r = unicode.ToUpper(r)
        } else if (unicode.IsUpper(r)) {
            r = unicode.ToLower(r)
        }
        stdout.WriteRune(r)
    }
    stdout.Flush()
}

bash

#! /bin/bash

while IFS= read -r -N 1 ch ; do
    printf "%c" "${ch}" | tr 'a-zA-Z' 'A-Za-z'
done

2017-10-15

各言語で標準入力から1バイトずつ読み込んで標準出力に吐き出すプログラムを書いてみる

Java C C++ PHP Python Ruby Perl Go bash

唐突に、手元にある各言語で標準入力から1バイトずつ読み込んで標準出力にそのまま吐き出すプログラムを書いてみようと思ったメモ。

普段は使わない言語も混じっているが、まぁ気にしない。

環境

手元にあるものということで、環境は以下のものに限定する。

CentOS 7
- Java (openjdk version "1.8.0_131")
- C (gcc (GCC) 4.8.5)
  - -std=gnu11でコンパイル
- C++ (g++ (GCC) 4.8.5)
  - -std=gnu++1yでコンパイル
- PHP (PHP 5.4.16 (cli))
- Python 2 (Python 2.7.5)
- Python 3 (Python 3.6.3)
  - ソースからビルドしたもの
- Ruby (ruby 2.0.0p648)
- Perl (v5.16.3)
- Go (go version go1.8.3 linux/amd64)
- bash (4.2.46(1)-release)

入力ファイル

所謂バイナリデータも含むので、od -cした結果を載せておく。

001.txt

0000000   H   e   l   l   o       W   o   r   l   d   !  \n
0000015

002.txt

0000000  \0 001 002 003 004 005 006  \a  \b  \t  \n  \v  \f  \r 016 017
0000020 020 021 022 023 024 025 026 027 030 031 032 033 034 035 036 037
0000040       !   "   #   $   %   &   '   (   )   *   +   ,   -   .   /
0000060   0   1   2   3   4   5   6   7   8   9   :   ;   <   =   >   ?
0000100   @   A   B   C   D   E   F   G   H   I   J   K   L   M   N   O
0000120   P   Q   R   S   T   U   V   W   X   Y   Z   [   \   ]   ^   _
0000140   `   a   b   c   d   e   f   g   h   i   j   k   l   m   n   o
0000160   p   q   r   s   t   u   v   w   x   y   z   {   |   }   ~ 177
0000200 200 201 202 203 204 205 206 207 210 211 212 213 214 215 216 217
0000220 220 221 222 223 224 225 226 227 230 231 232 233 234 235 236 237
0000240 240 241 242 243 244 245 246 247 250 251 252 253 254 255 256 257
0000260 260 261 262 263 264 265 266 267 270 271 272 273 274 275 276 277
0000300 300 301 302 303 304 305 306 307 310 311 312 313 314 315 316 317
0000320 320 321 322 323 324 325 326 327 330 331 332 333 334 335 336 337
0000340 340 341 342 343 344 345 346 347 350 351 352 353 354 355 356 357
0000360 360 361 362 363 364 365 366 367 370 371 372 373 374 375 376 377
0000400

2つ目のファイルは、制御文字も含めて、8ビットcharとしてありうるものを256個すべて並べたもの。

Java

import java.io.IOException;

public class Main {
    public static void main(String[] args) {
        try {
            int ch;
            while ((ch = System.in.read()) != -1) {
                System.out.write(ch);
            }
            System.out.flush();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

C

#include <stdio.h>

int main(int argc, char** argv) {
    int ch;

    while ((ch = getchar()) != -1) {
        putchar(ch);
    }

    return 0;
}

C++

#include <iostream>

using namespace std;

int main(int argc, char** argv) {
    char ch;

    while (cin.get(ch)) {
        cout << ch;
    }

    return EXIT_SUCCESS;
}

PHP

<?php

$in = fopen('php://stdin', 'r');
$out = fopen('php://stdout', 'w');
while (($ch = fgetc($in)) !== FALSE) {
    fputs($out, $ch);
}

Python 2

import sys;

while True:
    ch = sys.stdin.read(1)
    if ch == '':
        break
    sys.stdout.write(ch)

Python 3

import sys;

while True:
    ch = sys.stdin.buffer.read(1)
    if ch == b'':
        break
    sys.stdout.buffer.write(ch)

Ruby

while ch = STDIN.getc
    STDOUT.putc(ch.chr)
end

Perl

binmode(STDIN);
while (undef != read(STDIN, $ch, 1)) {
    print $ch;
}

Go

package main

import (
    "bufio"
    "io"
    "os"
)

func main() {
    stdin := bufio.NewReader(os.Stdin)
    stdout := bufio.NewWriter(os.Stdout)
    for {
        ch, err := stdin.ReadByte()
        if err == io.EOF {
            break
        }
        stdout.WriteByte(ch)
    }
    stdout.Flush()
}

bash

実を言うと、bashの場合は2つ目のファイルで正しい結果が得られないという問題がある。readコマンドで読み込むときに1バイトを超えて読み込む場合があるようで、後半部分の出力が期待値と合わない。ただ、7ビットASCIIの範囲では問題なく読み書きできているので一応載せておく。

#! /bin/bash

while IFS= read -r -N 1 ch ; do
    printf "%c" "${ch}"
done

環境

入力ファイルの例

期待される出力の例

C

Python 2

Python 3

Go

検証環境

parseInt版

Scanner版

入力データ

パターン1

パターン2

測定結果

パターン1

パターン2

測定環境

MainNnPlus (Nnの部分には「14」「15」が入る)

MainNnStringBuffer (Nnの部分には「14」「15」が入る)

Main15StringBuilder

実行（測定）

測定結果

Main14Plus

Main15Plus

Main14StringBuffer

Main15StringBuffer

Main15StringBuilder

ios_base::sync_with_stdio(false)の検証

cin.tie(NULL)の検証

環境

入力ファイル

期待される出力

C

Python 2

Python 3

Go

環境

入力ファイル

C

Python 2

Python 3

Go

環境

入力ファイル

C

Python 2

Python 3

Go

`ios_base::sync_with_stdio(false)`の検証

`cin.tie(NULL)`の検証